diff --git a/.github/workflows/utitcase-spark-3.x.yml b/.github/workflows/utitcase-spark-3.x.yml
index 2406bf4a1055..cc8c4c561740 100644
--- a/.github/workflows/utitcase-spark-3.x.yml
+++ b/.github/workflows/utitcase-spark-3.x.yml
@@ -54,7 +54,8 @@ jobs:
distribution: 'temurin'
- name: Build Spark 3 with ${{ matrix.scala_version }}
- run: mvn -T 2C -B clean install -DskipTests -Pspark3,flink1,scala-${{ matrix.scala_version }}
+ run: mvn -T 2C -B clean install -DskipTests -Ppaimon-faiss,spark3,flink1,scala-${{ matrix.scala_version }}
+
- name: Test Spark 3 with ${{ matrix.scala_version }}
timeout-minutes: 60
@@ -68,6 +69,6 @@ jobs:
test_modules+="org.apache.paimon:paimon-spark-${suffix}_${{ matrix.scala_version }},"
done
test_modules="${test_modules%,}"
- mvn -T 2C -B verify -pl "${test_modules}" -Duser.timezone=$jvm_timezone -Pspark3,flink1,scala-${{ matrix.scala_version }}
+ mvn -T 2C -B verify -pl "${test_modules}" -Duser.timezone=$jvm_timezone -Ppaimon-faiss,spark3,flink1,scala-${{ matrix.scala_version }}
env:
MAVEN_OPTS: -Xmx4096m
\ No newline at end of file
diff --git a/.github/workflows/utitcase-spark-4.x.yml b/.github/workflows/utitcase-spark-4.x.yml
index 71938fd5814b..46257a620a91 100644
--- a/.github/workflows/utitcase-spark-4.x.yml
+++ b/.github/workflows/utitcase-spark-4.x.yml
@@ -51,7 +51,8 @@ jobs:
distribution: 'temurin'
- name: Build Spark
- run: mvn -T 2C -B clean install -DskipTests -Pspark4,flink1,paimon-lucene
+ run: mvn -T 2C -B clean install -DskipTests -Pspark4,flink1,paimon-lucene,paimon-faiss
+
- name: Test Spark
timeout-minutes: 60
@@ -65,6 +66,6 @@ jobs:
test_modules+="org.apache.paimon:paimon-spark-${suffix}_2.13,"
done
test_modules="${test_modules%,}"
- mvn -T 2C -B verify -pl "${test_modules}" -Duser.timezone=$jvm_timezone -Pspark4,flink1,paimon-lucene
+ mvn -T 2C -B verify -pl "${test_modules}" -Duser.timezone=$jvm_timezone -Pspark4,flink1,paimon-lucene,paimon-faiss
env:
MAVEN_OPTS: -Xmx4096m
\ No newline at end of file
diff --git a/paimon-faiss-jni/.github/workflows/build.yml b/paimon-faiss-jni/.github/workflows/build.yml
new file mode 100644
index 000000000000..879316624787
--- /dev/null
+++ b/paimon-faiss-jni/.github/workflows/build.yml
@@ -0,0 +1,290 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: Build and Test
+
+on:
+ push:
+ branches: [ main, master ]
+ pull_request:
+ branches: [ main, master ]
+ release:
+ types: [ created ]
+
+jobs:
+ build-native-linux-x86_64:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up JDK 8
+ uses: actions/setup-java@v4
+ with:
+ java-version: '8'
+ distribution: 'temurin'
+
+ - name: Install dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y cmake build-essential libomp-dev
+
+ - name: Install Faiss
+ run: |
+ git clone --depth 1 --branch v1.7.4 https://github.com/facebookresearch/faiss.git
+ cd faiss
+ cmake -B build \
+ -DFAISS_ENABLE_GPU=OFF \
+ -DFAISS_ENABLE_PYTHON=OFF \
+ -DBUILD_TESTING=OFF \
+ -DCMAKE_BUILD_TYPE=Release
+ cmake --build build -j $(nproc)
+ sudo cmake --install build
+
+ - name: Build native library
+ run: |
+ chmod +x scripts/build-native.sh
+ ./scripts/build-native.sh --clean
+
+ - name: Upload native library
+ uses: actions/upload-artifact@v4
+ with:
+ name: native-linux-x86_64
+ path: target/native/linux-x86_64/
+
+ build-native-linux-aarch64:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up QEMU
+ uses: docker/setup-qemu-action@v3
+ with:
+ platforms: arm64
+
+ - name: Build in ARM64 container
+ uses: uraimo/run-on-arch-action@v2
+ with:
+ arch: aarch64
+ distro: ubuntu22.04
+ install: |
+ apt-get update
+ apt-get install -y cmake build-essential libomp-dev git openjdk-8-jdk
+ git clone --depth 1 --branch v1.7.4 https://github.com/facebookresearch/faiss.git
+ cd faiss
+ cmake -B build \
+ -DFAISS_ENABLE_GPU=OFF \
+ -DFAISS_ENABLE_PYTHON=OFF \
+ -DBUILD_TESTING=OFF \
+ -DCMAKE_BUILD_TYPE=Release
+ cmake --build build -j $(nproc)
+ cmake --install build
+ run: |
+ chmod +x scripts/build-native.sh
+ ./scripts/build-native.sh --clean
+
+ - name: Upload native library
+ uses: actions/upload-artifact@v4
+ with:
+ name: native-linux-aarch64
+ path: target/native/linux-aarch64/
+
+ build-native-macos-x86_64:
+ runs-on: macos-13
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up JDK 8
+ uses: actions/setup-java@v4
+ with:
+ java-version: '8'
+ distribution: 'temurin'
+
+ - name: Install dependencies
+ run: |
+ brew install cmake libomp
+
+ - name: Install Faiss
+ run: |
+ git clone --depth 1 --branch v1.7.4 https://github.com/facebookresearch/faiss.git
+ cd faiss
+ cmake -B build \
+ -DFAISS_ENABLE_GPU=OFF \
+ -DFAISS_ENABLE_PYTHON=OFF \
+ -DBUILD_TESTING=OFF \
+ -DCMAKE_BUILD_TYPE=Release
+ cmake --build build -j $(sysctl -n hw.ncpu)
+ sudo cmake --install build
+
+ - name: Build native library
+ run: |
+ chmod +x scripts/build-native.sh
+ ./scripts/build-native.sh --clean
+
+ - name: Upload native library
+ uses: actions/upload-artifact@v4
+ with:
+ name: native-osx-x86_64
+ path: target/native/osx-x86_64/
+
+ build-native-macos-aarch64:
+ runs-on: macos-14
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up JDK 8
+ uses: actions/setup-java@v4
+ with:
+ java-version: '8'
+ distribution: 'temurin'
+
+ - name: Install dependencies
+ run: |
+ brew install cmake libomp
+
+ - name: Install Faiss
+ run: |
+ git clone --depth 1 --branch v1.7.4 https://github.com/facebookresearch/faiss.git
+ cd faiss
+ cmake -B build \
+ -DFAISS_ENABLE_GPU=OFF \
+ -DFAISS_ENABLE_PYTHON=OFF \
+ -DBUILD_TESTING=OFF \
+ -DCMAKE_BUILD_TYPE=Release
+ cmake --build build -j $(sysctl -n hw.ncpu)
+ sudo cmake --install build
+
+ - name: Build native library
+ run: |
+ chmod +x scripts/build-native.sh
+ ./scripts/build-native.sh --clean
+
+ - name: Upload native library
+ uses: actions/upload-artifact@v4
+ with:
+ name: native-osx-aarch64
+ path: target/native/osx-aarch64/
+
+ package:
+ needs:
+ - build-native-linux-x86_64
+ - build-native-linux-aarch64
+ - build-native-macos-x86_64
+ - build-native-macos-aarch64
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up JDK 8
+ uses: actions/setup-java@v4
+ with:
+ java-version: '8'
+ distribution: 'temurin'
+
+ - name: Download Linux x86_64 native library
+ uses: actions/download-artifact@v4
+ with:
+ name: native-linux-x86_64
+ path: target/native/linux-x86_64/
+
+ - name: Download Linux aarch64 native library
+ uses: actions/download-artifact@v4
+ with:
+ name: native-linux-aarch64
+ path: target/native/linux-aarch64/
+
+ - name: Download macOS x86_64 native library
+ uses: actions/download-artifact@v4
+ with:
+ name: native-osx-x86_64
+ path: target/native/osx-x86_64/
+
+ - name: Download macOS aarch64 native library
+ uses: actions/download-artifact@v4
+ with:
+ name: native-osx-aarch64
+ path: target/native/osx-aarch64/
+
+ - name: List native libraries
+ run: |
+ echo "Native libraries:"
+ find target/native -type f | head -20
+
+ - name: Build JAR
+ run: mvn package -DskipTests
+
+ - name: Upload JAR
+ uses: actions/upload-artifact@v4
+ with:
+ name: paimon-faiss-jni-jar
+ path: target/*.jar
+
+ test:
+ needs: package
+ strategy:
+ matrix:
+ os: [ubuntu-latest, macos-14]
+ java: ['8', '11', '17']
+ runs-on: ${{ matrix.os }}
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up JDK ${{ matrix.java }}
+ uses: actions/setup-java@v4
+ with:
+ java-version: ${{ matrix.java }}
+ distribution: 'temurin'
+
+ - name: Download JAR
+ uses: actions/download-artifact@v4
+ with:
+ name: paimon-faiss-jni-jar
+ path: target/
+
+ - name: Run tests
+ run: mvn test
+
+ publish:
+ needs: test
+ if: github.event_name == 'release'
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up JDK 8
+ uses: actions/setup-java@v4
+ with:
+ java-version: '8'
+ distribution: 'temurin'
+ server-id: ossrh
+ server-username: MAVEN_USERNAME
+ server-password: MAVEN_PASSWORD
+ gpg-private-key: ${{ secrets.GPG_PRIVATE_KEY }}
+ gpg-passphrase: GPG_PASSPHRASE
+
+ - name: Download all native libraries
+ uses: actions/download-artifact@v4
+ with:
+ path: target/native/
+ pattern: native-*
+ merge-multiple: true
+
+ - name: Publish to Maven Central
+ run: mvn deploy -P release -DskipTests
+ env:
+ MAVEN_USERNAME: ${{ secrets.OSSRH_USERNAME }}
+ MAVEN_PASSWORD: ${{ secrets.OSSRH_TOKEN }}
+ GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
+
diff --git a/paimon-faiss-jni/.gitignore b/paimon-faiss-jni/.gitignore
new file mode 100644
index 000000000000..b1f2a22757d6
--- /dev/null
+++ b/paimon-faiss-jni/.gitignore
@@ -0,0 +1,44 @@
+# Maven
+target/
+pom.xml.tag
+pom.xml.releaseBackup
+pom.xml.versionsBackup
+pom.xml.next
+release.properties
+dependency-reduced-pom.xml
+buildNumber.properties
+.mvn/timing.properties
+.mvn/wrapper/maven-wrapper.jar
+
+# IDE
+.idea/
+*.iml
+*.ipr
+*.iws
+.project
+.classpath
+.settings/
+.vscode/
+*.swp
+*.swo
+*~
+
+# Native build
+build/
+cmake-build-*/
+CMakeFiles/
+CMakeCache.txt
+Makefile
+cmake_install.cmake
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Logs
+*.log
+
+# Test output
+test-output/
+surefire-reports/
+
diff --git a/paimon-faiss-jni/LICENSE b/paimon-faiss-jni/LICENSE
new file mode 100644
index 000000000000..29a51a4393bf
--- /dev/null
+++ b/paimon-faiss-jni/LICENSE
@@ -0,0 +1,203 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to the Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no theory of
+ liability, whether in contract, strict liability, or tort
+ (including negligence or otherwise) arising in any way out of
+ the use or inability to use the Work (even if such Holder or other
+ party has been advised of the possibility of such damages), shall
+ any Contributor be liable to You for damages, including any direct,
+ indirect, special, incidental, or consequential damages of any
+ character arising as a result of this License or out of the use or
+ inability to use the Work (including but not limited to damages
+ for loss of goodwill, work stoppage, computer failure or malfunction,
+ or any and all other commercial damages or losses), even if such
+ Contributor has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
diff --git a/paimon-faiss-jni/NOTICE b/paimon-faiss-jni/NOTICE
new file mode 100644
index 000000000000..ec764026b4f7
--- /dev/null
+++ b/paimon-faiss-jni/NOTICE
@@ -0,0 +1,9 @@
+Apache Paimon Faiss JNI
+Copyright 2024 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (https://www.apache.org/).
+
+This product includes Faiss (https://github.com/facebookresearch/faiss)
+developed by Facebook AI Research under the MIT License.
+
diff --git a/paimon-faiss-jni/README.md b/paimon-faiss-jni/README.md
new file mode 100644
index 000000000000..bd23ed173895
--- /dev/null
+++ b/paimon-faiss-jni/README.md
@@ -0,0 +1,265 @@
+# Paimon Faiss JNI
+
+Java bindings for [Faiss](https://github.com/facebookresearch/faiss) - A library for efficient similarity search and clustering of dense vectors.
+
+## Overview
+
+Paimon Faiss JNI provides a high-performance Java API for Faiss, following a similar architecture to [RocksDB Java](https://github.com/facebook/rocksdb/wiki/RocksJava-Basics). The library consists of:
+
+1. **Java API Layer** (`org.apache.paimon.faiss` package) - High-level Java classes for creating and managing Faiss indexes
+2. **JNI Bridge** - C++ code that connects Java to the native Faiss library
+3. **Native Libraries** - Pre-compiled Faiss libraries for different platforms, bundled in the JAR
+
+## Features
+
+- **Multiple Index Types**: Flat, IVF, HNSW, PQ, and more
+- **Cross-Platform**: Supports Linux (x86_64, aarch64), macOS (x86_64, aarch64), and Windows (x86_64)
+- **Fat JAR**: All native libraries bundled in a single JAR
+- **Automatic Native Library Loading**: No manual library path configuration required
+- **Thread-Safe Loading**: Safe for use in multi-threaded applications
+
+## Quick Start
+
+### Maven Dependency
+
+```xml
+
+ org.apache.paimon
+ paimon-faiss-jni
+ 0.1.0
+
+```
+
+### Basic Usage
+
+```java
+import org.apache.paimon.faiss.*;
+
+// Create a flat index for exact nearest neighbor search
+try (Index index = IndexFactory.createFlat(128, MetricType.L2)) {
+ // Add vectors (10000 vectors of dimension 128)
+ float[] vectors = new float[10000 * 128];
+ // ... fill vectors with data ...
+ index.add(vectors);
+
+ // Search for 10 nearest neighbors
+ float[] query = new float[128];
+ // ... fill query vector ...
+ SearchResult result = index.searchSingle(query, 10);
+
+ // Access results
+ for (int i = 0; i < 10; i++) {
+ long id = result.getLabel(0, i);
+ float distance = result.getDistance(0, i);
+ System.out.println("Neighbor " + i + ": id=" + id + ", distance=" + distance);
+ }
+}
+```
+
+## Index Types
+
+### Flat Index (Exact Search)
+
+```java
+// L2 distance (Euclidean)
+Index index = IndexFactory.createFlat(dimension, MetricType.L2);
+
+// Inner product (cosine similarity for normalized vectors)
+Index index = IndexFactory.createFlat(dimension, MetricType.INNER_PRODUCT);
+```
+
+### HNSW Index (Approximate Search)
+
+```java
+// Create HNSW index with M=32 neighbors
+Index index = IndexFactory.createHNSW(dimension, 32, MetricType.L2);
+
+// Configure search parameters
+IndexHNSW.setEfSearch(index, 64); // Higher = more accurate but slower
+```
+
+### IVF Index (Approximate Search with Training)
+
+```java
+// Create IVF index with 1000 clusters
+Index index = IndexFactory.createIVFFlat(dimension, 1000, MetricType.L2);
+
+// Train the index (required before adding vectors)
+index.train(trainingVectors);
+
+// Add vectors
+index.add(vectors);
+
+// Configure search
+IndexIVF.setNprobe(index, 10); // Search 10 out of 1000 clusters
+```
+
+### Custom Index Description
+
+```java
+// Use Faiss index factory syntax
+Index index = IndexFactory.create(dimension, "IVF1000,PQ16", MetricType.L2);
+```
+
+## Index Operations
+
+### Adding Vectors
+
+```java
+// Add multiple vectors
+float[] vectors = new float[n * dimension];
+index.add(vectors);
+
+// Add single vector
+float[] vector = new float[dimension];
+index.addSingle(vector);
+
+// Add with custom IDs (requires IDMap index)
+Index index = IndexFactory.createFlatWithIds(dimension, MetricType.L2);
+long[] ids = {100, 200, 300};
+index.addWithIds(vectors, ids);
+```
+
+### Searching
+
+```java
+// Single query, k neighbors
+SearchResult result = index.searchSingle(query, k);
+
+// Batch queries
+float[] queries = new float[numQueries * dimension];
+SearchResult result = index.search(queries, k);
+
+// Access results
+for (int q = 0; q < numQueries; q++) {
+ long[] labels = result.getLabelsForQuery(q);
+ float[] distances = result.getDistancesForQuery(q);
+}
+```
+
+### Persistence
+
+```java
+// Save to file
+index.writeToFile("/path/to/index.faiss");
+
+// Load from file
+Index loaded = Index.readFromFile("/path/to/index.faiss");
+
+// Serialize to byte array
+byte[] data = index.serialize();
+
+// Deserialize from byte array
+Index restored = Index.deserialize(data);
+```
+
+## Building from Source
+
+### Prerequisites
+
+- JDK 8 or later
+- Maven 3.6+
+- CMake 3.14+
+- C++ compiler with C++17 support
+- Faiss library (install via package manager or build from source)
+- OpenMP
+
+### Build Native Library
+
+```bash
+# Install Faiss (example for Ubuntu)
+sudo apt-get install libfaiss-dev
+
+# Or build Faiss from source
+git clone https://github.com/facebookresearch/faiss.git
+cd faiss
+cmake -B build -DFAISS_ENABLE_GPU=OFF -DFAISS_ENABLE_PYTHON=OFF
+cmake --build build -j
+sudo cmake --install build
+
+# Build native library
+./scripts/build-native.sh
+
+# Build Java JAR
+mvn package
+```
+
+### Cross-Platform Builds
+
+For cross-platform builds, use the GitHub Actions workflow or Docker:
+
+```bash
+# Build using Docker (Linux x86_64)
+docker run --rm -v $PWD:/work dockcross/linux-x64 bash -c \
+ "cd /work && ./scripts/build-native.sh"
+```
+
+## Configuration
+
+### Native Library Path
+
+By default, the library loads native libraries from the JAR. You can override this:
+
+```java
+// Use system property
+System.setProperty("paimon.faiss.lib.path", "/path/to/libpaimon_faiss_jni.so");
+
+// Or rely on java.library.path
+java -Djava.library.path=/path/to/libs -jar myapp.jar
+```
+
+### Thread Count
+
+```java
+// Set number of threads for parallel operations
+Faiss.setNumThreads(4);
+
+// Get current thread count
+int threads = Faiss.getNumThreads();
+```
+
+## API Reference
+
+### Core Classes
+
+| Class | Description |
+|-------|-------------|
+| `Index` | Main index class for vector storage and search |
+| `IndexFactory` | Factory methods for creating different index types |
+| `SearchResult` | Container for k-NN search results |
+| `RangeSearchResult` | Container for range search results |
+| `MetricType` | Enum for L2 and Inner Product metrics |
+
+### Utility Classes
+
+| Class | Description |
+|-------|-------------|
+| `Faiss` | Global configuration (thread count, version) |
+| `IndexIVF` | IVF-specific operations (nprobe, nlist) |
+| `IndexHNSW` | HNSW-specific operations (efSearch) |
+| `NativeLibraryLoader` | Handles native library loading |
+
+## Performance Tips
+
+1. **Choose the right index type**:
+ - Small dataset (<10K vectors): Use `Flat`
+ - Medium dataset (10K-1M): Use `HNSW` or `IVFFlat`
+ - Large dataset (>1M): Use `IVFPQ` or `IVF+HNSW`
+
+2. **Batch operations**: Add and search in batches for better performance
+
+3. **Training**: For IVF indexes, use representative training data (typically 10-100x the number of clusters)
+
+4. **Tune parameters**:
+ - IVF: Increase `nprobe` for better recall
+ - HNSW: Increase `efSearch` for better recall
+
+## License
+
+Apache License 2.0
+
+## Acknowledgments
+
+- [Faiss](https://github.com/facebookresearch/faiss) by Facebook AI Research
+- [RocksDB Java](https://github.com/facebook/rocksdb) for the JNI architecture inspiration
+
diff --git a/paimon-faiss-jni/pom.xml b/paimon-faiss-jni/pom.xml
new file mode 100644
index 000000000000..3939f91a3cd9
--- /dev/null
+++ b/paimon-faiss-jni/pom.xml
@@ -0,0 +1,324 @@
+
+
+ 4.0.0
+
+ org.apache.paimon
+ paimon-faiss-jni
+ 1.4-SNAPSHOT
+ jar
+
+ Paimon Faiss JNI
+ Java bindings for Faiss - A library for efficient similarity search and clustering of dense vectors
+ https://github.com/apache/paimon
+
+
+
+ The Apache License, Version 2.0
+ https://www.apache.org/licenses/LICENSE-2.0.txt
+
+
+
+
+ UTF-8
+ 1.8
+ 1.8
+ 1.8
+ 5.10.0
+ 1.7.36
+
+
+
+
+
+ org.slf4j
+ slf4j-api
+ ${slf4j.version}
+
+
+
+
+ org.junit.jupiter
+ junit-jupiter
+ ${junit.version}
+ test
+
+
+ org.slf4j
+ slf4j-simple
+ ${slf4j.version}
+ test
+
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.11.0
+
+ ${java.version}
+ ${java.version}
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-antrun-plugin
+ 3.1.0
+
+
+ generate-jni-headers
+ compile
+
+ run
+
+
+
+
+
+
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-jar-plugin
+ 3.3.0
+
+
+
+ true
+
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-resources-plugin
+ 3.3.1
+
+
+
+ so
+ dylib
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+ 3.5.1
+
+
+ package
+
+ shade
+
+
+ false
+
+
+ org.apache.paimon.faiss.FaissVersion
+
+ Paimon Faiss JNI
+
+
+
+
+
+ *:*
+
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+
+
+
+
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-source-plugin
+ 3.3.0
+
+
+ attach-sources
+
+ jar-no-fork
+
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-javadoc-plugin
+ 3.6.0
+
+ none
+ false
+
+
+
+ attach-javadocs
+
+ jar
+
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-surefire-plugin
+ 3.1.2
+
+
+
+
+
+
+
+ release
+
+
+
+
+ org.apache.maven.plugins
+ maven-gpg-plugin
+ 3.1.0
+
+
+ sign-artifacts
+ verify
+
+ sign
+
+
+
+
+
+
+
+ org.sonatype.plugins
+ nexus-staging-maven-plugin
+ 1.6.13
+ true
+
+ ossrh
+ https://oss.sonatype.org/
+ true
+
+
+
+
+
+
+
+
+ linux-x86_64
+
+
+ unix
+ Linux
+ amd64
+
+
+
+ so
+ linux-x86_64
+
+
+
+
+
+ linux-aarch64
+
+
+ unix
+ Linux
+ aarch64
+
+
+
+ so
+ linux-aarch64
+
+
+
+
+
+ mac-x86_64
+
+
+ mac
+ x86_64
+
+
+
+ dylib
+ osx-x86_64
+
+
+
+
+
+ mac-aarch64
+
+
+ mac
+ aarch64
+
+
+
+ dylib
+ osx-aarch64
+
+
+
+
+
+
+
+ ossrh
+ https://oss.sonatype.org/content/repositories/snapshots
+
+
+ ossrh
+ https://oss.sonatype.org/service/local/staging/deploy/maven2/
+
+
+
+
+ scm:git:git://github.com/apache/paimon.git
+ scm:git:ssh://github.com:apache/paimon.git
+ https://github.com/apache/paimon/tree/master
+
+
+
+
+ Apache Paimon Team
+ dev@paimon.apache.org
+ Apache Paimon
+ https://paimon.apache.org/
+
+
+
+
diff --git a/paimon-faiss-jni/scripts/build-all-platforms.sh b/paimon-faiss-jni/scripts/build-all-platforms.sh
new file mode 100755
index 000000000000..4a40f7842d0e
--- /dev/null
+++ b/paimon-faiss-jni/scripts/build-all-platforms.sh
@@ -0,0 +1,407 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# This script builds native libraries for different platforms.
+#
+# Supported platforms:
+# - linux/amd64
+# - linux/aarch64
+# - darwin/amd64
+# - darwin/aarch64
+#
+# Usage:
+# ./build-all-platforms.sh # Build for current platform
+# ./build-all-platforms.sh --platform linux/amd64 # Build for specific platform (via Docker)
+# ./build-all-platforms.sh --all # Build for all platforms (requires Docker)
+# ./build-all-platforms.sh --list # List supported platforms
+#
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
+RESOURCES_DIR="$PROJECT_DIR/src/main/resources"
+
+# Supported platforms (Linux and macOS only)
+PLATFORMS=(
+ "linux/amd64"
+ "linux/aarch64"
+ "darwin/amd64"
+ "darwin/aarch64"
+)
+
+# Get Docker image for a platform
+get_docker_image() {
+ local platform="$1"
+ case "$platform" in
+ "linux/amd64")
+ echo "dockcross/linux-x64"
+ ;;
+ "linux/aarch64")
+ echo "dockcross/linux-arm64"
+ ;;
+ *)
+ echo ""
+ ;;
+ esac
+}
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+print_header() {
+ echo -e "${BLUE}================================================${NC}"
+ echo -e "${BLUE}$1${NC}"
+ echo -e "${BLUE}================================================${NC}"
+}
+
+print_success() {
+ echo -e "${GREEN}✓ $1${NC}"
+}
+
+print_warning() {
+ echo -e "${YELLOW}⚠ $1${NC}"
+}
+
+print_error() {
+ echo -e "${RED}✗ $1${NC}"
+}
+
+show_help() {
+ echo "Usage: $0 [options]"
+ echo ""
+ echo "Options:"
+ echo " --platform PLATFORM Build for specific platform (e.g., linux/amd64)"
+ echo " --current Build for current platform only (default)"
+ echo " --all Build for all supported platforms (requires Docker)"
+ echo " --list List all supported platforms"
+ echo " --clean Clean build directory before building"
+ echo " --opt-level LEVEL Optimization level: generic, avx2, avx512 (default: generic)"
+ echo " --help Show this help message"
+ echo ""
+ echo "Supported platforms:"
+ for platform in "${PLATFORMS[@]}"; do
+ echo " - $platform"
+ done
+ echo ""
+ echo "Examples:"
+ echo " $0 # Build for current platform"
+ echo " $0 --platform linux/amd64 # Build for Linux x86_64 (via Docker)"
+ echo " $0 --all # Build for all platforms"
+}
+
+list_platforms() {
+ echo "Supported platforms:"
+ echo ""
+ for platform in "${PLATFORMS[@]}"; do
+ os=$(echo "$platform" | cut -d'/' -f1)
+ arch=$(echo "$platform" | cut -d'/' -f2)
+
+ case "$os" in
+ linux)
+ lib_ext="so"
+ ;;
+ darwin)
+ lib_ext="dylib"
+ ;;
+ esac
+
+ echo " $platform"
+ echo " Library: libpaimon_faiss_jni.$lib_ext"
+ echo " Output: src/main/resources/$os/$arch/"
+ echo ""
+ done
+}
+
+get_current_platform() {
+ local os arch
+
+ case "$(uname -s)" in
+ Linux)
+ os="linux"
+ ;;
+ Darwin)
+ os="darwin"
+ ;;
+ *)
+ print_error "Unsupported OS: $(uname -s). Only Linux and macOS are supported."
+ exit 1
+ ;;
+ esac
+
+ case "$(uname -m)" in
+ x86_64|amd64)
+ arch="amd64"
+ ;;
+ aarch64|arm64)
+ arch="aarch64"
+ ;;
+ *)
+ print_error "Unsupported architecture: $(uname -m)"
+ exit 1
+ ;;
+ esac
+
+ echo "$os/$arch"
+}
+
+build_current_platform() {
+ local opt_level="$1"
+ local clean="$2"
+
+ print_header "Building for current platform: $(get_current_platform)"
+
+ local args=()
+ if [ -n "$opt_level" ]; then
+ args+=(--opt-level "$opt_level")
+ fi
+ if [ "$clean" = true ]; then
+ args+=(--clean)
+ fi
+
+ "$SCRIPT_DIR/build-native.sh" "${args[@]}"
+}
+
+build_with_docker() {
+ local platform="$1"
+ local opt_level="$2"
+ local clean="$3"
+
+ local docker_image
+ docker_image=$(get_docker_image "$platform")
+
+ if [ -z "$docker_image" ]; then
+ print_error "No Docker image available for platform: $platform"
+ print_warning "This platform must be built natively on the target OS."
+ return 1
+ fi
+
+ print_header "Building for $platform using Docker"
+ echo "Using image: $docker_image"
+ echo ""
+
+ # Check if Docker is available
+ if ! command -v docker &> /dev/null; then
+ print_error "Docker is not installed or not in PATH"
+ exit 1
+ fi
+
+ # Pull the image if needed
+ echo "Pulling Docker image..."
+ docker pull "$docker_image"
+
+ # Create build script for container
+ local build_script="/tmp/build-in-docker.sh"
+ cat > "$build_script" << 'DOCKER_SCRIPT'
+#!/bin/bash
+set -e
+cd /work
+mkdir -p build/native
+cd build/native
+cmake -DCMAKE_BUILD_TYPE=Release ../../src/main/native
+cmake --build . --config Release -j $(nproc)
+DOCKER_SCRIPT
+ chmod +x "$build_script"
+
+ # Run build in container
+ docker run --rm \
+ -v "$PROJECT_DIR:/work" \
+ -v "$build_script:/build.sh:ro" \
+ "$docker_image" \
+ bash /build.sh
+
+ rm -f "$build_script"
+
+ print_success "Build completed for $platform"
+}
+
+build_platform() {
+ local platform="$1"
+ local opt_level="$2"
+ local clean="$3"
+ local current_platform
+
+ current_platform=$(get_current_platform)
+
+ if [ "$platform" = "$current_platform" ]; then
+ build_current_platform "$opt_level" "$clean"
+ else
+ build_with_docker "$platform" "$opt_level" "$clean"
+ fi
+}
+
+build_all_platforms() {
+ local opt_level="$1"
+ local clean="$2"
+ local current_platform
+ local failed_platforms=()
+
+ current_platform=$(get_current_platform)
+
+ print_header "Building for All Platforms"
+ echo ""
+ echo "Current platform: $current_platform"
+ echo ""
+
+ for platform in "${PLATFORMS[@]}"; do
+ echo ""
+ echo "----------------------------------------"
+
+ if [ "$platform" = "$current_platform" ]; then
+ build_current_platform "$opt_level" "$clean"
+ else
+ if ! build_with_docker "$platform" "$opt_level" "$clean"; then
+ failed_platforms+=("$platform")
+ fi
+ fi
+ done
+
+ echo ""
+ print_header "Build Summary"
+ echo ""
+
+ echo "Native libraries in $RESOURCES_DIR:"
+ find "$RESOURCES_DIR" -type f \( -name "*.so" -o -name "*.dylib" \) 2>/dev/null | while read -r lib; do
+ echo " - $lib"
+ done
+
+ if [ ${#failed_platforms[@]} -gt 0 ]; then
+ echo ""
+ print_warning "Some platforms could not be built:"
+ for p in "${failed_platforms[@]}"; do
+ echo " - $p (build natively on target platform)"
+ done
+ fi
+}
+
+show_native_libs() {
+ echo ""
+ echo "Native libraries in resources directory:"
+ echo ""
+
+ for platform in "${PLATFORMS[@]}"; do
+ os=$(echo "$platform" | cut -d'/' -f1)
+ arch=$(echo "$platform" | cut -d'/' -f2)
+ dir="$RESOURCES_DIR/$os/$arch"
+
+ if [ -d "$dir" ]; then
+ libs=$(find "$dir" -type f \( -name "*.so" -o -name "*.dylib" \) 2>/dev/null)
+ if [ -n "$libs" ]; then
+ print_success "$platform:"
+ echo "$libs" | while read -r lib; do
+ size=$(ls -lh "$lib" | awk '{print $5}')
+ echo " $(basename "$lib") ($size)"
+ done
+ else
+ print_warning "$platform: (not built)"
+ fi
+ else
+ print_warning "$platform: (directory not found)"
+ fi
+ done
+}
+
+# Parse arguments
+PLATFORM=""
+BUILD_ALL=false
+BUILD_CURRENT=false
+LIST_ONLY=false
+CLEAN=false
+OPT_LEVEL="generic"
+
+while [[ $# -gt 0 ]]; do
+ case $1 in
+ --platform)
+ PLATFORM="$2"
+ shift 2
+ ;;
+ --current)
+ BUILD_CURRENT=true
+ shift
+ ;;
+ --all)
+ BUILD_ALL=true
+ shift
+ ;;
+ --list)
+ LIST_ONLY=true
+ shift
+ ;;
+ --clean)
+ CLEAN=true
+ shift
+ ;;
+ --opt-level)
+ OPT_LEVEL="$2"
+ shift 2
+ ;;
+ --help|-h)
+ show_help
+ exit 0
+ ;;
+ *)
+ print_error "Unknown option: $1"
+ echo ""
+ show_help
+ exit 1
+ ;;
+ esac
+done
+
+# Execute based on options
+if [ "$LIST_ONLY" = true ]; then
+ list_platforms
+ show_native_libs
+ exit 0
+fi
+
+if [ "$BUILD_ALL" = true ]; then
+ build_all_platforms "$OPT_LEVEL" "$CLEAN"
+elif [ -n "$PLATFORM" ]; then
+ # Validate platform
+ valid=false
+ for p in "${PLATFORMS[@]}"; do
+ if [ "$p" = "$PLATFORM" ]; then
+ valid=true
+ break
+ fi
+ done
+
+ if [ "$valid" = false ]; then
+ print_error "Invalid platform: $PLATFORM"
+ echo ""
+ list_platforms
+ exit 1
+ fi
+
+ build_platform "$PLATFORM" "$OPT_LEVEL" "$CLEAN"
+else
+ # Default: build for current platform
+ build_current_platform "$OPT_LEVEL" "$CLEAN"
+fi
+
+echo ""
+show_native_libs
+echo ""
+echo "To package the JAR with all native libraries, run:"
+echo " mvn package"
diff --git a/paimon-faiss-jni/scripts/build-native.sh b/paimon-faiss-jni/scripts/build-native.sh
new file mode 100755
index 000000000000..2679f1388116
--- /dev/null
+++ b/paimon-faiss-jni/scripts/build-native.sh
@@ -0,0 +1,335 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
+NATIVE_DIR="$PROJECT_DIR/src/main/native"
+BUILD_DIR="$PROJECT_DIR/build/native"
+
+# Parse arguments
+OPT_LEVEL="generic"
+CLEAN=false
+FAT_LIB=true # Default to fat lib
+
+while [[ $# -gt 0 ]]; do
+ case $1 in
+ --opt-level)
+ OPT_LEVEL="$2"
+ shift 2
+ ;;
+ --clean)
+ CLEAN=true
+ shift
+ ;;
+ --fat-lib)
+ FAT_LIB=true
+ shift
+ ;;
+ --no-fat-lib)
+ FAT_LIB=false
+ shift
+ ;;
+ --help)
+ echo "Usage: $0 [options]"
+ echo ""
+ echo "Options:"
+ echo " --opt-level LEVEL Optimization level: generic, avx2, avx512 (default: generic)"
+ echo " --fat-lib Build fat library with all dependencies (default: enabled)"
+ echo " --no-fat-lib Build without bundling dependencies"
+ echo " --clean Clean build directory before building"
+ echo " --help Show this help message"
+ echo ""
+ echo "Environment variables:"
+ echo " FAISS_ROOT Path to Faiss installation"
+ echo " JAVA_HOME Path to Java installation"
+ echo " OPENBLAS_ROOT Path to OpenBLAS installation"
+ echo ""
+ echo "Example:"
+ echo " FAISS_ROOT=/opt/faiss $0 --clean --fat-lib"
+ exit 0
+ ;;
+ *)
+ echo "Unknown option: $1"
+ exit 1
+ ;;
+ esac
+done
+
+echo "================================================"
+echo "Building Paimon Faiss JNI - Native Library"
+echo "================================================"
+echo "Optimization level: $OPT_LEVEL"
+echo "Fat library: $FAT_LIB"
+echo ""
+
+# Clean if requested
+if [ "$CLEAN" = true ]; then
+ echo "Cleaning build directory..."
+ rm -rf "$BUILD_DIR"
+fi
+
+# Create build directory
+mkdir -p "$BUILD_DIR"
+cd "$BUILD_DIR"
+
+# Check for CMake cache from different source directory (cross-machine builds)
+if [ -f "CMakeCache.txt" ]; then
+ CACHED_SOURCE=$(grep "CMAKE_HOME_DIRECTORY:INTERNAL=" CMakeCache.txt 2>/dev/null | cut -d'=' -f2)
+ if [ -n "$CACHED_SOURCE" ] && [ "$CACHED_SOURCE" != "$NATIVE_DIR" ]; then
+ echo "Detected CMake cache from different source directory."
+ echo " Cached: $CACHED_SOURCE"
+ echo " Current: $NATIVE_DIR"
+ echo "Cleaning build directory to avoid conflicts..."
+ rm -rf "$BUILD_DIR"/*
+ fi
+fi
+
+# Detect platform
+OS=$(uname -s)
+ARCH=$(uname -m)
+
+echo "Detected platform: $OS $ARCH"
+
+# macOS specific: check for libomp
+if [ "$OS" = "Darwin" ]; then
+ if ! brew list libomp &>/dev/null; then
+ echo ""
+ echo "WARNING: libomp not found. Installing via Homebrew..."
+ echo "Run: brew install libomp"
+ echo ""
+ echo "If you don't have Homebrew, install it from https://brew.sh"
+ echo "Or install libomp manually and set OPENMP_ROOT environment variable."
+ echo ""
+
+ # Try to install automatically
+ if command -v brew &>/dev/null; then
+ brew install libomp
+ else
+ echo "ERROR: Homebrew not found. Please install libomp manually."
+ exit 1
+ fi
+ else
+ echo "Found libomp via Homebrew"
+ fi
+fi
+
+# Run CMake
+echo ""
+echo "Configuring with CMake..."
+
+CMAKE_ARGS=(
+ -DCMAKE_BUILD_TYPE=Release
+ -DFAISS_OPT_LEVEL="$OPT_LEVEL"
+ -DBUILD_FAT_LIB="$FAT_LIB"
+)
+
+# Add platform-specific options
+if [ "$OS" = "Darwin" ]; then
+ # On macOS, we might need to specify the SDK
+ if [ -n "$SDKROOT" ]; then
+ CMAKE_ARGS+=(-DCMAKE_OSX_SYSROOT="$SDKROOT")
+ fi
+
+ # For Apple Silicon, we might want universal binary
+ if [ "$ARCH" = "arm64" ]; then
+ CMAKE_ARGS+=(-DCMAKE_OSX_ARCHITECTURES="arm64")
+ fi
+fi
+
+# If FAISS_ROOT is set, pass it to CMake
+if [ -n "$FAISS_ROOT" ]; then
+ CMAKE_ARGS+=(-DFAISS_ROOT="$FAISS_ROOT")
+ echo "Using FAISS_ROOT: $FAISS_ROOT"
+fi
+
+# If OPENBLAS_ROOT is set, pass it to CMake
+if [ -n "$OPENBLAS_ROOT" ]; then
+ CMAKE_ARGS+=(-DOPENBLAS_ROOT="$OPENBLAS_ROOT")
+ echo "Using OPENBLAS_ROOT: $OPENBLAS_ROOT"
+fi
+
+# If JAVA_HOME is set, use it
+if [ -n "$JAVA_HOME" ]; then
+ CMAKE_ARGS+=(-DJAVA_HOME="$JAVA_HOME")
+ echo "Using JAVA_HOME: $JAVA_HOME"
+fi
+
+cmake "${CMAKE_ARGS[@]}" "$NATIVE_DIR"
+
+# Build
+echo ""
+echo "Building..."
+cmake --build . --config Release -j "$(nproc 2>/dev/null || sysctl -n hw.ncpu)"
+
+echo ""
+echo "============================================"
+echo "Build completed successfully!"
+echo "============================================"
+
+# Determine output directory based on platform
+if [ "$OS" = "Linux" ]; then
+ PLATFORM_OS="linux"
+ if [ "$ARCH" = "x86_64" ] || [ "$ARCH" = "amd64" ]; then
+ PLATFORM_ARCH="amd64"
+ else
+ PLATFORM_ARCH="aarch64"
+ fi
+elif [ "$OS" = "Darwin" ]; then
+ PLATFORM_OS="darwin"
+ if [ "$ARCH" = "arm64" ]; then
+ PLATFORM_ARCH="aarch64"
+ else
+ PLATFORM_ARCH="amd64"
+ fi
+fi
+OUTPUT_DIR="$PROJECT_DIR/src/main/resources/$PLATFORM_OS/$PLATFORM_ARCH"
+
+# Bundle dependency libraries if building fat lib and they're dynamically linked
+if [ "$FAT_LIB" = true ] && [ "$OS" = "Linux" ]; then
+ echo ""
+ echo "Checking for dynamic dependencies to bundle..."
+
+ JNI_LIB="$OUTPUT_DIR/libpaimon_faiss_jni.so"
+ if [ -f "$JNI_LIB" ]; then
+ # Function to bundle a library and its dependencies
+ bundle_lib() {
+ local lib_path="$1"
+ local target_name="$2"
+
+ if [ -z "$lib_path" ] || [ ! -f "$lib_path" ]; then
+ return 1
+ fi
+
+ local real_path=$(readlink -f "$lib_path")
+ if [ -f "$OUTPUT_DIR/$target_name" ]; then
+ echo " Already bundled: $target_name"
+ return 0
+ fi
+
+ cp "$real_path" "$OUTPUT_DIR/$target_name"
+ chmod +x "$OUTPUT_DIR/$target_name"
+ echo " Bundled: $real_path -> $target_name"
+ return 0
+ }
+
+ # Libraries to bundle (pattern -> target name)
+ # We check the JNI lib and all bundled libs recursively
+ LIBS_TO_CHECK="$JNI_LIB"
+ LIBS_CHECKED=""
+
+ while [ -n "$LIBS_TO_CHECK" ]; do
+ CURRENT_LIB=$(echo "$LIBS_TO_CHECK" | awk '{print $1}')
+ LIBS_TO_CHECK=$(echo "$LIBS_TO_CHECK" | cut -d' ' -f2-)
+ [ "$LIBS_TO_CHECK" = "$CURRENT_LIB" ] && LIBS_TO_CHECK=""
+
+ # Skip if already checked
+ echo "$LIBS_CHECKED" | grep -q "$CURRENT_LIB" && continue
+ LIBS_CHECKED="$LIBS_CHECKED $CURRENT_LIB"
+
+ echo "Checking dependencies of: $(basename "$CURRENT_LIB")"
+
+ # Get all dependencies
+ DEPS=$(ldd "$CURRENT_LIB" 2>/dev/null | grep "=>" | awk '{print $1 " " $3}')
+
+ while IFS= read -r dep_line; do
+ [ -z "$dep_line" ] && continue
+ DEP_NAME=$(echo "$dep_line" | awk '{print $1}')
+ DEP_PATH=$(echo "$dep_line" | awk '{print $2}')
+
+ # Skip system libraries that are universally available
+ case "$DEP_NAME" in
+ linux-vdso.so*|libc.so*|libm.so*|libpthread.so*|libdl.so*|librt.so*|ld-linux*)
+ continue
+ ;;
+ esac
+
+ # Bundle specific libraries we know are problematic
+ case "$DEP_NAME" in
+ libopenblas*)
+ if bundle_lib "$DEP_PATH" "libopenblas.so.0"; then
+ LIBS_TO_CHECK="$LIBS_TO_CHECK $OUTPUT_DIR/libopenblas.so.0"
+ fi
+ ;;
+ libgfortran*)
+ # Keep the original versioned name
+ bundle_lib "$DEP_PATH" "$DEP_NAME"
+ ;;
+ libgomp*)
+ bundle_lib "$DEP_PATH" "libgomp.so.1"
+ ;;
+ libquadmath*)
+ bundle_lib "$DEP_PATH" "$DEP_NAME"
+ ;;
+ libgcc_s*)
+ bundle_lib "$DEP_PATH" "$DEP_NAME"
+ ;;
+ libblas*|liblapack*)
+ bundle_lib "$DEP_PATH" "$DEP_NAME"
+ ;;
+ esac
+ done <<< "$DEPS"
+ done
+
+ # Set rpath to $ORIGIN for all bundled libraries
+ if command -v patchelf &>/dev/null; then
+ echo ""
+ echo "Setting rpath to \$ORIGIN for all libraries..."
+ for lib in "$OUTPUT_DIR"/*.so*; do
+ if [ -f "$lib" ]; then
+ patchelf --set-rpath '$ORIGIN' "$lib" 2>/dev/null || true
+ fi
+ done
+ echo "Done setting rpath"
+ else
+ echo ""
+ echo "WARNING: patchelf not found, cannot set rpath"
+ echo "Install with: sudo apt-get install patchelf"
+ fi
+ fi
+fi
+
+echo ""
+echo "Native library location:"
+BUILT_LIBS=$(find "$PROJECT_DIR/src/main/resources" -type f \( -name "*.so" -o -name "*.so.*" -o -name "*.dylib" \) 2>/dev/null)
+
+if [ -n "$BUILT_LIBS" ]; then
+ for lib in $BUILT_LIBS; do
+ echo ""
+ echo "Library: $lib"
+ ls -la "$lib"
+
+ # Show library dependencies
+ echo ""
+ echo "Dependencies:"
+ if [ "$OS" = "Darwin" ]; then
+ otool -L "$lib" 2>/dev/null | head -20 || true
+ elif [ "$OS" = "Linux" ]; then
+ ldd "$lib" 2>/dev/null | head -20 || readelf -d "$lib" 2>/dev/null | grep NEEDED | head -20 || true
+ fi
+ done
+else
+ echo " (no libraries found)"
+ ls -la "$PROJECT_DIR/src/main/resources/"*/*/ 2>/dev/null || true
+fi
+
+echo ""
+echo "To package the JAR with native libraries, run:"
+echo " mvn package"
+
diff --git a/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/Faiss.java b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/Faiss.java
new file mode 100644
index 000000000000..b128c8b3478f
--- /dev/null
+++ b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/Faiss.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss;
+
+/**
+ * Global Faiss configuration and utilities.
+ *
+ *
This class provides methods for configuring Faiss globally, such as
+ * setting the number of threads for parallel operations.
+ *
+ *
Example usage:
+ *
{@code
+ * // Set the number of threads for Faiss operations
+ * Faiss.setNumThreads(4);
+ *
+ * // Get the Faiss version
+ * String version = Faiss.getVersion();
+ * }
+ */
+public final class Faiss {
+
+ static {
+ try {
+ NativeLibraryLoader.load();
+ } catch (FaissException e) {
+ throw new ExceptionInInitializerError(e);
+ }
+ }
+
+ private Faiss() {
+ // Static utility class
+ }
+
+ /**
+ * Get the version of the Faiss library.
+ *
+ * @return the version string
+ */
+ public static String getVersion() {
+ return FaissNative.getVersion();
+ }
+
+ /**
+ * Set the number of threads for parallel operations.
+ *
+ *
This affects operations like index training, adding vectors,
+ * and searching. Set to 1 to disable parallelism.
+ *
+ * @param numThreads the number of threads (must be positive)
+ */
+ public static void setNumThreads(int numThreads) {
+ if (numThreads <= 0) {
+ throw new IllegalArgumentException("Number of threads must be positive: " + numThreads);
+ }
+ FaissNative.setNumThreads(numThreads);
+ }
+
+ /**
+ * Get the number of threads for parallel operations.
+ *
+ * @return the current number of threads
+ */
+ public static int getNumThreads() {
+ return FaissNative.getNumThreads();
+ }
+
+ /**
+ * Ensure the native library is loaded.
+ *
+ *
This method is called automatically when any Faiss class is used.
+ * It can be called explicitly to load the library early and catch
+ * any loading errors.
+ *
+ * @throws FaissException if the native library cannot be loaded
+ */
+ public static void loadLibrary() throws FaissException {
+ NativeLibraryLoader.load();
+ }
+
+ /**
+ * Check if the native library has been loaded.
+ *
+ * @return true if the library is loaded
+ */
+ public static boolean isLibraryLoaded() {
+ return NativeLibraryLoader.isLoaded();
+ }
+}
+
diff --git a/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/FaissException.java b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/FaissException.java
new file mode 100644
index 000000000000..cd20e2eea0d0
--- /dev/null
+++ b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/FaissException.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss;
+
+/**
+ * Exception thrown when a Faiss operation fails.
+ *
+ *
This exception wraps errors from the native Faiss library as well as
+ * errors that occur during JNI operations.
+ */
+public class FaissException extends Exception {
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * Creates a new FaissException with the specified message.
+ *
+ * @param message the error message
+ */
+ public FaissException(String message) {
+ super(message);
+ }
+
+ /**
+ * Creates a new FaissException with the specified message and cause.
+ *
+ * @param message the error message
+ * @param cause the underlying cause
+ */
+ public FaissException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ /**
+ * Creates a new FaissException with the specified cause.
+ *
+ * @param cause the underlying cause
+ */
+ public FaissException(Throwable cause) {
+ super(cause);
+ }
+}
+
diff --git a/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/FaissNative.java b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/FaissNative.java
new file mode 100644
index 000000000000..ea07d5ac2eaf
--- /dev/null
+++ b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/FaissNative.java
@@ -0,0 +1,307 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss;
+
+/**
+ * Native method declarations for Faiss JNI.
+ *
+ *
This class contains all the native method declarations that are implemented
+ * in the JNI C++ layer. These methods directly map to Faiss C++ API calls.
+ *
+ *
Users should not call these methods directly. Instead, use the high-level
+ * Java API classes like {@link Index} and {@link IndexFactory}.
+ */
+final class FaissNative {
+
+ static {
+ try {
+ NativeLibraryLoader.load();
+ } catch (FaissException e) {
+ throw new ExceptionInInitializerError(e);
+ }
+ }
+
+ private FaissNative() {
+ // Static utility class
+ }
+
+ // ==================== Index Factory ====================
+
+ /**
+ * Create an index using an index factory string.
+ *
+ * @param dimension the dimension of the vectors
+ * @param description the index description string (e.g., "Flat", "IVF100,Flat", "HNSW32")
+ * @param metricType the metric type (0 = L2, 1 = Inner Product)
+ * @return the native handle of the created index
+ */
+ static native long indexFactoryCreate(int dimension, String description, int metricType);
+
+ // ==================== Index Operations ====================
+
+ /**
+ * Destroy an index and free its resources.
+ *
+ * @param handle the native handle of the index
+ */
+ static native void indexDestroy(long handle);
+
+ /**
+ * Get the dimension of an index.
+ *
+ * @param handle the native handle of the index
+ * @return the dimension
+ */
+ static native int indexGetDimension(long handle);
+
+ /**
+ * Get the number of vectors in an index.
+ *
+ * @param handle the native handle of the index
+ * @return the number of vectors
+ */
+ static native long indexGetCount(long handle);
+
+ /**
+ * Check if an index is trained.
+ *
+ * @param handle the native handle of the index
+ * @return true if trained
+ */
+ static native boolean indexIsTrained(long handle);
+
+ /**
+ * Get the metric type of an index.
+ *
+ * @param handle the native handle of the index
+ * @return the metric type (0 = L2, 1 = Inner Product)
+ */
+ static native int indexGetMetricType(long handle);
+
+ /**
+ * Train an index on a set of training vectors.
+ *
+ * @param handle the native handle of the index
+ * @param n the number of training vectors
+ * @param vectors the training vectors (n * dimension floats)
+ */
+ static native void indexTrain(long handle, long n, float[] vectors);
+
+ /**
+ * Add vectors to an index.
+ *
+ * @param handle the native handle of the index
+ * @param n the number of vectors to add
+ * @param vectors the vectors to add (n * dimension floats)
+ */
+ static native void indexAdd(long handle, long n, float[] vectors);
+
+ /**
+ * Add vectors with IDs to an index.
+ *
+ * @param handle the native handle of the index
+ * @param n the number of vectors to add
+ * @param vectors the vectors to add (n * dimension floats)
+ * @param ids the IDs for the vectors (n longs)
+ */
+ static native void indexAddWithIds(long handle, long n, float[] vectors, long[] ids);
+
+ /**
+ * Search for the k nearest neighbors of query vectors.
+ *
+ * @param handle the native handle of the index
+ * @param n the number of query vectors
+ * @param queries the query vectors (n * dimension floats)
+ * @param k the number of nearest neighbors to find
+ * @param distances output array for distances (n * k floats)
+ * @param labels output array for labels/IDs (n * k longs)
+ */
+ static native void indexSearch(
+ long handle, long n, float[] queries, int k, float[] distances, long[] labels);
+
+ /**
+ * Search for neighbors within a given radius.
+ *
+ * @param handle the native handle of the index
+ * @param n the number of query vectors
+ * @param queries the query vectors (n * dimension floats)
+ * @param radius the search radius
+ * @return a range search result handle
+ */
+ static native long indexRangeSearch(long handle, long n, float[] queries, float radius);
+
+ /**
+ * Remove vectors by IDs from an index.
+ *
+ * @param handle the native handle of the index
+ * @param ids the IDs to remove
+ * @return the number of vectors removed
+ */
+ static native long indexRemoveIds(long handle, long[] ids);
+
+ /**
+ * Reset an index (remove all vectors).
+ *
+ * @param handle the native handle of the index
+ */
+ static native void indexReset(long handle);
+
+ // ==================== Index I/O ====================
+
+ /**
+ * Write an index to a file.
+ *
+ * @param handle the native handle of the index
+ * @param path the file path to write to
+ */
+ static native void indexWriteToFile(long handle, String path);
+
+ /**
+ * Read an index from a file.
+ *
+ * @param path the file path to read from
+ * @return the native handle of the loaded index
+ */
+ static native long indexReadFromFile(String path);
+
+ /**
+ * Serialize an index to a byte array.
+ *
+ * @param handle the native handle of the index
+ * @return the serialized bytes
+ */
+ static native byte[] indexSerialize(long handle);
+
+ /**
+ * Deserialize an index from a byte array.
+ *
+ * @param data the serialized bytes
+ * @return the native handle of the loaded index
+ */
+ static native long indexDeserialize(byte[] data);
+
+ // ==================== Range Search Result ====================
+
+ /**
+ * Destroy a range search result.
+ *
+ * @param handle the native handle of the range search result
+ */
+ static native void rangeSearchResultDestroy(long handle);
+
+ /**
+ * Get the number of results for each query in a range search.
+ *
+ * @param handle the native handle of the range search result
+ * @return array of result counts per query
+ */
+ static native long[] rangeSearchResultGetLimits(long handle);
+
+ /**
+ * Get all labels from a range search result.
+ *
+ * @param handle the native handle of the range search result
+ * @return array of all labels
+ */
+ static native long[] rangeSearchResultGetLabels(long handle);
+
+ /**
+ * Get all distances from a range search result.
+ *
+ * @param handle the native handle of the range search result
+ * @return array of all distances
+ */
+ static native float[] rangeSearchResultGetDistances(long handle);
+
+ // ==================== IVF Index Specific ====================
+
+ /**
+ * Get the number of probe lists for an IVF index.
+ *
+ * @param handle the native handle of the index
+ * @return the number of probe lists (nprobe)
+ */
+ static native int ivfGetNprobe(long handle);
+
+ /**
+ * Set the number of probe lists for an IVF index.
+ *
+ * @param handle the native handle of the index
+ * @param nprobe the number of probe lists
+ */
+ static native void ivfSetNprobe(long handle, int nprobe);
+
+ /**
+ * Get the number of lists (clusters) in an IVF index.
+ *
+ * @param handle the native handle of the index
+ * @return the number of lists
+ */
+ static native int ivfGetNlist(long handle);
+
+ // ==================== HNSW Index Specific ====================
+
+ /**
+ * Get the efSearch parameter of an HNSW index.
+ *
+ * @param handle the native handle of the index
+ * @return the efSearch value
+ */
+ static native int hnswGetEfSearch(long handle);
+
+ /**
+ * Set the efSearch parameter of an HNSW index.
+ *
+ * @param handle the native handle of the index
+ * @param efSearch the efSearch value
+ */
+ static native void hnswSetEfSearch(long handle, int efSearch);
+
+ /**
+ * Get the efConstruction parameter of an HNSW index.
+ *
+ * @param handle the native handle of the index
+ * @return the efConstruction value
+ */
+ static native int hnswGetEfConstruction(long handle);
+
+ // ==================== Utility ====================
+
+ /**
+ * Get the Faiss library version.
+ *
+ * @return the version string
+ */
+ static native String getVersion();
+
+ /**
+ * Set the number of threads for parallel operations.
+ *
+ * @param numThreads the number of threads
+ */
+ static native void setNumThreads(int numThreads);
+
+ /**
+ * Get the number of threads for parallel operations.
+ *
+ * @return the number of threads
+ */
+ static native int getNumThreads();
+}
+
diff --git a/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/FaissVersion.java b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/FaissVersion.java
new file mode 100644
index 000000000000..c6f6fe592724
--- /dev/null
+++ b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/FaissVersion.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Properties;
+
+/**
+ * Version information for Paimon Faiss.
+ *
+ *
This class provides version information for both the Java binding
+ * and the underlying Faiss native library.
+ */
+public final class FaissVersion {
+
+ /** The version of the Java binding. */
+ private static final String JAVA_VERSION;
+
+ /** The version of the native Faiss library. */
+ private static final String NATIVE_VERSION;
+
+ static {
+ Properties props = new Properties();
+ try (InputStream is = FaissVersion.class.getResourceAsStream("/paimon-faiss-version.properties")) {
+ if (is != null) {
+ props.load(is);
+ }
+ } catch (IOException e) {
+ // Ignore, use defaults
+ }
+
+ JAVA_VERSION = props.getProperty("version", "unknown");
+ NATIVE_VERSION = props.getProperty("faiss.version", "unknown");
+ }
+
+ private FaissVersion() {
+ // Utility class
+ }
+
+ /**
+ * Get the version of the Java binding.
+ *
+ * @return the Java binding version
+ */
+ public static String getJavaVersion() {
+ return JAVA_VERSION;
+ }
+
+ /**
+ * Get the version of the native Faiss library.
+ *
+ * @return the native Faiss version
+ */
+ public static String getNativeVersion() {
+ return NATIVE_VERSION;
+ }
+
+ /**
+ * Get the platform identifier for the current system.
+ *
+ * @return the platform identifier
+ */
+ public static String getPlatform() {
+ return NativeLibraryLoader.getPlatformIdentifier();
+ }
+
+ /**
+ * Main method to print version information.
+ *
+ * @param args command line arguments (ignored)
+ */
+ public static void main(String[] args) {
+ System.out.println("Paimon Faiss JNI Version Information");
+ System.out.println("================================");
+ System.out.println("Java Binding Version: " + getJavaVersion());
+ System.out.println("Native Faiss Version: " + getNativeVersion());
+ System.out.println("Platform: " + getPlatform());
+ System.out.println("Java Version: " + System.getProperty("java.version"));
+ System.out.println("OS: " + System.getProperty("os.name") + " " + System.getProperty("os.version"));
+ System.out.println("Architecture: " + System.getProperty("os.arch"));
+ }
+}
+
diff --git a/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/Index.java b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/Index.java
new file mode 100644
index 000000000000..4aab51f086d3
--- /dev/null
+++ b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/Index.java
@@ -0,0 +1,372 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss;
+
+import java.io.File;
+
+/**
+ * A Faiss index for similarity search.
+ *
+ *
This class wraps a native Faiss index and provides methods for adding vectors,
+ * searching for nearest neighbors, and managing the index.
+ *
+ *
Index instances must be closed when no longer needed to free native resources.
+ * It is recommended to use try-with-resources:
+ *
+ *
Thread Safety: Index instances are NOT thread-safe. External synchronization
+ * is required if an index is accessed from multiple threads.
+ *
+ * @see IndexFactory
+ */
+public class Index implements AutoCloseable {
+
+ /** Native handle to the Faiss index. */
+ private long nativeHandle;
+
+ /** The dimension of vectors in this index. */
+ private final int dimension;
+
+ /** Whether this index has been closed. */
+ private volatile boolean closed = false;
+
+ /**
+ * Create an Index wrapper around a native handle.
+ *
+ * @param nativeHandle the native handle
+ * @param dimension the vector dimension
+ */
+ Index(long nativeHandle, int dimension) {
+ this.nativeHandle = nativeHandle;
+ this.dimension = dimension;
+ }
+
+ /**
+ * Get the dimension of vectors in this index.
+ *
+ * @return the vector dimension
+ */
+ public int getDimension() {
+ return dimension;
+ }
+
+ /**
+ * Get the number of vectors in this index.
+ *
+ * @return the number of vectors
+ */
+ public long getCount() {
+ checkNotClosed();
+ return FaissNative.indexGetCount(nativeHandle);
+ }
+
+ /**
+ * Check if this index is trained.
+ *
+ *
Some index types (like IVF) require training before vectors can be added.
+ * Flat indexes are always considered trained.
+ *
+ * @return true if the index is trained
+ */
+ public boolean isTrained() {
+ checkNotClosed();
+ return FaissNative.indexIsTrained(nativeHandle);
+ }
+
+ /**
+ * Get the metric type used by this index.
+ *
+ * @return the metric type
+ */
+ public MetricType getMetricType() {
+ checkNotClosed();
+ return MetricType.fromValue(FaissNative.indexGetMetricType(nativeHandle));
+ }
+
+ /**
+ * Train the index on a set of training vectors.
+ *
+ *
This is required for some index types (like IVF) before adding vectors.
+ * For flat indexes, this is a no-op.
+ *
+ * @param vectors the training vectors (n * dimension floats)
+ */
+ public void train(float[] vectors) {
+ checkNotClosed();
+ if (vectors.length % dimension != 0) {
+ throw new IllegalArgumentException(
+ "Vector array length must be a multiple of dimension " + dimension);
+ }
+ long n = vectors.length / dimension;
+ FaissNative.indexTrain(nativeHandle, n, vectors);
+ }
+
+ /**
+ * Add vectors to the index.
+ *
+ *
The vectors are assigned sequential IDs starting from the current count.
+ *
+ * @param vectors the vectors to add (n * dimension floats)
+ */
+ public void add(float[] vectors) {
+ checkNotClosed();
+ if (vectors.length % dimension != 0) {
+ throw new IllegalArgumentException(
+ "Vector array length must be a multiple of dimension " + dimension);
+ }
+ long n = vectors.length / dimension;
+ FaissNative.indexAdd(nativeHandle, n, vectors);
+ }
+
+ /**
+ * Add a single vector to the index.
+ *
+ * @param vector the vector to add (dimension floats)
+ */
+ public void addSingle(float[] vector) {
+ checkNotClosed();
+ if (vector.length != dimension) {
+ throw new IllegalArgumentException(
+ "Vector length must equal dimension " + dimension + ", got " + vector.length);
+ }
+ FaissNative.indexAdd(nativeHandle, 1, vector);
+ }
+
+ /**
+ * Add vectors with explicit IDs to the index.
+ *
+ *
Note: Not all index types support this operation. Flat indexes and
+ * IndexIDMap wrapped indexes support it.
+ *
+ * @param vectors the vectors to add (n * dimension floats)
+ * @param ids the IDs for the vectors (n longs)
+ */
+ public void addWithIds(float[] vectors, long[] ids) {
+ checkNotClosed();
+ if (vectors.length % dimension != 0) {
+ throw new IllegalArgumentException(
+ "Vector array length must be a multiple of dimension " + dimension);
+ }
+ long n = vectors.length / dimension;
+ if (ids.length != n) {
+ throw new IllegalArgumentException(
+ "Number of IDs (" + ids.length + ") must match number of vectors (" + n + ")");
+ }
+ FaissNative.indexAddWithIds(nativeHandle, n, vectors, ids);
+ }
+
+ /**
+ * Search for the k nearest neighbors of query vectors.
+ *
+ * @param queries the query vectors (n * dimension floats)
+ * @param k the number of nearest neighbors to find
+ * @return the search result containing labels and distances
+ */
+ public SearchResult search(float[] queries, int k) {
+ checkNotClosed();
+ if (queries.length % dimension != 0) {
+ throw new IllegalArgumentException(
+ "Query array length must be a multiple of dimension " + dimension);
+ }
+ int n = queries.length / dimension;
+ long[] labels = new long[n * k];
+ float[] distances = new float[n * k];
+ FaissNative.indexSearch(nativeHandle, n, queries, k, distances, labels);
+ return new SearchResult(n, k, labels, distances);
+ }
+
+ /**
+ * Search for a single query vector.
+ *
+ * @param query the query vector (dimension floats)
+ * @param k the number of nearest neighbors to find
+ * @return the search result
+ */
+ public SearchResult searchSingle(float[] query, int k) {
+ checkNotClosed();
+ if (query.length != dimension) {
+ throw new IllegalArgumentException(
+ "Query length must equal dimension " + dimension + ", got " + query.length);
+ }
+ long[] labels = new long[k];
+ float[] distances = new float[k];
+ FaissNative.indexSearch(nativeHandle, 1, query, k, distances, labels);
+ return new SearchResult(1, k, labels, distances);
+ }
+
+ /**
+ * Search for all neighbors within a given radius.
+ *
+ * @param queries the query vectors (n * dimension floats)
+ * @param radius the search radius
+ * @return the range search result
+ */
+ public RangeSearchResult rangeSearch(float[] queries, float radius) {
+ checkNotClosed();
+ if (queries.length % dimension != 0) {
+ throw new IllegalArgumentException(
+ "Query array length must be a multiple of dimension " + dimension);
+ }
+ int n = queries.length / dimension;
+ long resultHandle = FaissNative.indexRangeSearch(nativeHandle, n, queries, radius);
+ return new RangeSearchResult(resultHandle, n);
+ }
+
+ /**
+ * Remove vectors by their IDs.
+ *
+ *
Note: Not all index types support removal. Check Faiss documentation
+ * for details on which index types support this operation.
+ *
+ * @param ids the IDs of vectors to remove
+ * @return the number of vectors actually removed
+ */
+ public long removeIds(long[] ids) {
+ checkNotClosed();
+ return FaissNative.indexRemoveIds(nativeHandle, ids);
+ }
+
+ /**
+ * Reset the index (remove all vectors).
+ */
+ public void reset() {
+ checkNotClosed();
+ FaissNative.indexReset(nativeHandle);
+ }
+
+ /**
+ * Write the index to a file.
+ *
+ * @param path the file path
+ */
+ public void writeToFile(String path) {
+ checkNotClosed();
+ FaissNative.indexWriteToFile(nativeHandle, path);
+ }
+
+ /**
+ * Write the index to a file.
+ *
+ * @param file the file
+ */
+ public void writeToFile(File file) {
+ writeToFile(file.getAbsolutePath());
+ }
+
+ /**
+ * Read an index from a file.
+ *
+ * @param path the file path
+ * @return the loaded index
+ */
+ public static Index readFromFile(String path) {
+ long handle = FaissNative.indexReadFromFile(path);
+ int dimension = FaissNative.indexGetDimension(handle);
+ return new Index(handle, dimension);
+ }
+
+ /**
+ * Read an index from a file.
+ *
+ * @param file the file
+ * @return the loaded index
+ */
+ public static Index readFromFile(File file) {
+ return readFromFile(file.getAbsolutePath());
+ }
+
+ /**
+ * Serialize the index to a byte array.
+ *
+ * @return the serialized bytes
+ */
+ public byte[] serialize() {
+ checkNotClosed();
+ return FaissNative.indexSerialize(nativeHandle);
+ }
+
+ /**
+ * Deserialize an index from a byte array.
+ *
+ * @param data the serialized bytes
+ * @return the deserialized index
+ */
+ public static Index deserialize(byte[] data) {
+ long handle = FaissNative.indexDeserialize(data);
+ int dimension = FaissNative.indexGetDimension(handle);
+ return new Index(handle, dimension);
+ }
+
+ /**
+ * Get the native handle.
+ *
+ *
This is for internal use only.
+ *
+ * @return the native handle
+ */
+ long getNativeHandle() {
+ return nativeHandle;
+ }
+
+ private void checkNotClosed() {
+ if (closed) {
+ throw new IllegalStateException("Index has been closed");
+ }
+ }
+
+ @Override
+ public void close() {
+ if (!closed) {
+ closed = true;
+ if (nativeHandle != 0) {
+ FaissNative.indexDestroy(nativeHandle);
+ nativeHandle = 0;
+ }
+ }
+ }
+
+ @Override
+ protected void finalize() throws Throwable {
+ try {
+ close();
+ } finally {
+ super.finalize();
+ }
+ }
+
+ @Override
+ public String toString() {
+ if (closed) {
+ return "Index[closed]";
+ }
+ return "Index{" +
+ "dimension=" + dimension +
+ ", count=" + getCount() +
+ ", trained=" + isTrained() +
+ ", metricType=" + getMetricType() +
+ '}';
+ }
+}
+
diff --git a/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/IndexFactory.java b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/IndexFactory.java
new file mode 100644
index 000000000000..123da14d99df
--- /dev/null
+++ b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/IndexFactory.java
@@ -0,0 +1,247 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss;
+
+/**
+ * Factory for creating Faiss indexes.
+ *
+ *
This class provides static methods for creating various types of Faiss indexes
+ * using Faiss's index factory syntax.
+ *
+ *
Index Description Syntax
+ *
+ *
The index description string follows Faiss's index factory format:
+ *
+ *
{@code "IVF100,Flat"} - IVF index with 100 clusters and flat quantizer
+ *
{@code "IVF100,PQ8"} - IVF index with PQ compression (8 bytes per vector)
+ *
{@code "HNSW32"} - HNSW graph index with 32 neighbors per node
+ *
{@code "HNSW32,Flat"} - HNSW with flat storage
+ *
{@code "PQ16"} - Product quantization with 16 bytes per vector
+ *
{@code "OPQ16,PQ16"} - Optimized PQ with rotation
+ *
{@code "IVF100,PQ16x4"} - IVF with 4-bit PQ
+ *
{@code "IDMap,Flat"} - Flat index with ID mapping support
+ *
{@code "IDMap2,Flat"} - Flat index with ID mapping and removal support
+ *
+ *
+ *
Preprocessing Options
+ *
+ *
Preprocessing can be added before the main index:
+ *
+ *
{@code "PCA64,Flat"} - PCA dimensionality reduction to 64 dims
+ *
{@code "L2norm,Flat"} - L2 normalization before indexing
+ *
{@code "ITQ64,Flat"} - ITQ rotation to 64 dims
+ *
+ *
+ *
Example Usage
+ *
+ *
{@code
+ * // Create a flat index for exact search
+ * Index flatIndex = IndexFactory.create(128, "Flat", MetricType.L2);
+ *
+ * // Create an IVF index for approximate search
+ * Index ivfIndex = IndexFactory.create(128, "IVF1000,Flat", MetricType.L2);
+ * ivfIndex.train(trainingVectors); // Training required for IVF
+ *
+ * // Create an HNSW index
+ * Index hnswIndex = IndexFactory.create(128, "HNSW32", MetricType.INNER_PRODUCT);
+ *
+ * // Create a flat index with ID mapping
+ * Index idMapIndex = IndexFactory.create(128, "IDMap,Flat", MetricType.L2);
+ * }
+ *
+ * @see Index
+ * @see MetricType
+ */
+public final class IndexFactory {
+
+ private IndexFactory() {
+ // Static utility class
+ }
+
+ /**
+ * Create a Faiss index using the index factory.
+ *
+ * @param dimension the dimension of the vectors
+ * @param description the index description string
+ * @param metricType the metric type for similarity computation
+ * @return the created index
+ */
+ public static Index create(int dimension, String description, MetricType metricType) {
+ if (dimension <= 0) {
+ throw new IllegalArgumentException("Dimension must be positive: " + dimension);
+ }
+ if (description == null || description.isEmpty()) {
+ throw new IllegalArgumentException("Index description cannot be null or empty");
+ }
+ if (metricType == null) {
+ throw new IllegalArgumentException("Metric type cannot be null");
+ }
+
+ long handle = FaissNative.indexFactoryCreate(dimension, description, metricType.getValue());
+ return new Index(handle, dimension);
+ }
+
+ /**
+ * Create a Faiss index with L2 (Euclidean) metric.
+ *
+ * @param dimension the dimension of the vectors
+ * @param description the index description string
+ * @return the created index
+ */
+ public static Index create(int dimension, String description) {
+ return create(dimension, description, MetricType.L2);
+ }
+
+ /**
+ * Create a flat (brute-force) index.
+ *
+ *
Flat indexes provide exact search results but have O(n) search complexity.
+ * Suitable for small datasets (up to ~100K vectors).
+ *
+ * @param dimension the dimension of the vectors
+ * @param metricType the metric type
+ * @return the created index
+ */
+ public static Index createFlat(int dimension, MetricType metricType) {
+ return create(dimension, "Flat", metricType);
+ }
+
+ /**
+ * Create a flat index with L2 metric.
+ *
+ * @param dimension the dimension of the vectors
+ * @return the created index
+ */
+ public static Index createFlat(int dimension) {
+ return createFlat(dimension, MetricType.L2);
+ }
+
+ /**
+ * Create a flat index with ID mapping support.
+ *
+ *
This allows adding vectors with explicit IDs using {@link Index#addWithIds}.
+ *
+ * @param dimension the dimension of the vectors
+ * @param metricType the metric type
+ * @return the created index
+ */
+ public static Index createFlatWithIds(int dimension, MetricType metricType) {
+ return create(dimension, "IDMap,Flat", metricType);
+ }
+
+ /**
+ * Create an IVF (Inverted File) index.
+ *
+ *
IVF indexes partition the vector space into clusters for faster search.
+ * They require training before use.
+ *
+ * @param dimension the dimension of the vectors
+ * @param nlist the number of clusters (typically sqrt(n) to 4*sqrt(n))
+ * @param metricType the metric type
+ * @return the created index
+ */
+ public static Index createIVFFlat(int dimension, int nlist, MetricType metricType) {
+ return create(dimension, "IVF" + nlist + ",Flat", metricType);
+ }
+
+ /**
+ * Create an IVF index with product quantization.
+ *
+ *
IVF-PQ provides a good balance between search speed, memory usage, and accuracy.
+ *
+ * @param dimension the dimension of the vectors
+ * @param nlist the number of clusters
+ * @param m the number of sub-vectors for PQ (dimension must be divisible by m)
+ * @param metricType the metric type
+ * @return the created index
+ */
+ public static Index createIVFPQ(int dimension, int nlist, int m, MetricType metricType) {
+ if (dimension % m != 0) {
+ throw new IllegalArgumentException(
+ "Dimension " + dimension + " must be divisible by m " + m);
+ }
+ return create(dimension, "IVF" + nlist + ",PQ" + m, metricType);
+ }
+
+ /**
+ * Create an HNSW (Hierarchical Navigable Small World) index.
+ *
+ *
HNSW provides excellent search performance with good recall.
+ * It does not require training.
+ *
+ * @param dimension the dimension of the vectors
+ * @param m the number of neighbors in the graph (typically 16-64)
+ * @param metricType the metric type
+ * @return the created index
+ */
+ public static Index createHNSW(int dimension, int m, MetricType metricType) {
+ return create(dimension, "HNSW" + m, metricType);
+ }
+
+ /**
+ * Create an HNSW index with flat storage.
+ *
+ * @param dimension the dimension of the vectors
+ * @param m the number of neighbors in the graph
+ * @param metricType the metric type
+ * @return the created index
+ */
+ public static Index createHNSWFlat(int dimension, int m, MetricType metricType) {
+ return create(dimension, "HNSW" + m + ",Flat", metricType);
+ }
+
+ /**
+ * Create a product quantization index.
+ *
+ *
PQ indexes provide significant memory savings at the cost of some accuracy.
+ * They require training.
+ *
+ * @param dimension the dimension of the vectors
+ * @param m the number of sub-vectors (dimension must be divisible by m)
+ * @param metricType the metric type
+ * @return the created index
+ */
+ public static Index createPQ(int dimension, int m, MetricType metricType) {
+ if (dimension % m != 0) {
+ throw new IllegalArgumentException(
+ "Dimension " + dimension + " must be divisible by m " + m);
+ }
+ return create(dimension, "PQ" + m, metricType);
+ }
+
+ /**
+ * Create a scalar quantizer index.
+ *
+ *
Scalar quantization compresses vectors by quantizing each dimension.
+ *
+ * @param dimension the dimension of the vectors
+ * @param bits the number of bits per dimension (4 or 8)
+ * @param metricType the metric type
+ * @return the created index
+ */
+ public static Index createScalarQuantizer(int dimension, int bits, MetricType metricType) {
+ if (bits != 4 && bits != 8) {
+ throw new IllegalArgumentException("Bits must be 4 or 8, got: " + bits);
+ }
+ return create(dimension, "SQ" + bits, metricType);
+ }
+}
+
diff --git a/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/IndexHNSW.java b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/IndexHNSW.java
new file mode 100644
index 000000000000..9e1c26e32b21
--- /dev/null
+++ b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/IndexHNSW.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss;
+
+/**
+ * Utility class for HNSW (Hierarchical Navigable Small World) index operations.
+ *
+ *
HNSW indexes build a graph structure for fast approximate nearest neighbor search.
+ * The key parameters are:
+ *
+ *
+ *
{@code M} - The number of neighbors in the graph. Higher values increase
+ * memory usage and build time but improve search accuracy.
+ *
{@code efConstruction} - The size of the dynamic candidate list during construction.
+ * Higher values increase build time but can improve the graph quality.
+ *
{@code efSearch} - The size of the dynamic candidate list during search.
+ * Higher values increase search accuracy at the cost of speed.
+ *
+ *
+ *
Example usage:
+ *
{@code
+ * Index index = IndexFactory.createHNSW(128, 32, MetricType.L2);
+ * index.add(vectors);
+ *
+ * // Increase efSearch for more accurate results
+ * IndexHNSW.setEfSearch(index, 64);
+ *
+ * SearchResult result = index.search(queries, 10);
+ * }
+ */
+public final class IndexHNSW {
+
+ private IndexHNSW() {
+ // Static utility class
+ }
+
+ /**
+ * Get the efSearch parameter.
+ *
+ *
This controls the size of the dynamic candidate list during search.
+ * Higher values give more accurate results but slower search.
+ *
+ * @param index the HNSW index
+ * @return the current efSearch value
+ * @throws IllegalArgumentException if the index is not an HNSW index
+ */
+ public static int getEfSearch(Index index) {
+ return FaissNative.hnswGetEfSearch(index.getNativeHandle());
+ }
+
+ /**
+ * Set the efSearch parameter.
+ *
+ *
This should be at least k (the number of neighbors requested in search).
+ * Typical values range from 16 to 256. Higher values give more accurate
+ * results but slower search.
+ *
+ * @param index the HNSW index
+ * @param efSearch the efSearch value
+ * @throws IllegalArgumentException if the index is not an HNSW index
+ */
+ public static void setEfSearch(Index index, int efSearch) {
+ if (efSearch <= 0) {
+ throw new IllegalArgumentException("efSearch must be positive: " + efSearch);
+ }
+ FaissNative.hnswSetEfSearch(index.getNativeHandle(), efSearch);
+ }
+
+ /**
+ * Get the efConstruction parameter.
+ *
+ *
This was the size of the dynamic candidate list during index construction.
+ * It cannot be changed after the index is built.
+ *
+ * @param index the HNSW index
+ * @return the efConstruction value
+ * @throws IllegalArgumentException if the index is not an HNSW index
+ */
+ public static int getEfConstruction(Index index) {
+ return FaissNative.hnswGetEfConstruction(index.getNativeHandle());
+ }
+}
+
diff --git a/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/IndexIVF.java b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/IndexIVF.java
new file mode 100644
index 000000000000..01fe92e05dc8
--- /dev/null
+++ b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/IndexIVF.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss;
+
+/**
+ * Utility class for IVF (Inverted File) index operations.
+ *
+ *
IVF indexes partition the vector space into clusters (cells) and only
+ * search a subset of clusters during search. The {@code nprobe} parameter
+ * controls how many clusters to search, trading off between speed and accuracy.
+ *
+ *
Example usage:
+ *
{@code
+ * Index index = IndexFactory.createIVFFlat(128, 1000, MetricType.L2);
+ * index.train(trainingVectors);
+ * index.add(vectors);
+ *
+ * // Set number of clusters to probe during search
+ * IndexIVF.setNprobe(index, 10); // Search 10 out of 1000 clusters
+ *
+ * SearchResult result = index.search(queries, 10);
+ * }
+ */
+public final class IndexIVF {
+
+ private IndexIVF() {
+ // Static utility class
+ }
+
+ /**
+ * Get the number of clusters to probe during search (nprobe).
+ *
+ * @param index the IVF index
+ * @return the current nprobe value
+ * @throws IllegalArgumentException if the index is not an IVF index
+ */
+ public static int getNprobe(Index index) {
+ return FaissNative.ivfGetNprobe(index.getNativeHandle());
+ }
+
+ /**
+ * Set the number of clusters to probe during search (nprobe).
+ *
+ *
Higher values increase accuracy but decrease search speed.
+ * A good starting point is 1-10% of the total number of clusters.
+ *
+ * @param index the IVF index
+ * @param nprobe the number of clusters to probe
+ * @throws IllegalArgumentException if the index is not an IVF index
+ */
+ public static void setNprobe(Index index, int nprobe) {
+ if (nprobe <= 0) {
+ throw new IllegalArgumentException("nprobe must be positive: " + nprobe);
+ }
+ FaissNative.ivfSetNprobe(index.getNativeHandle(), nprobe);
+ }
+
+ /**
+ * Get the total number of clusters (nlist) in the index.
+ *
+ * @param index the IVF index
+ * @return the number of clusters
+ * @throws IllegalArgumentException if the index is not an IVF index
+ */
+ public static int getNlist(Index index) {
+ return FaissNative.ivfGetNlist(index.getNativeHandle());
+ }
+}
+
diff --git a/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/MetricType.java b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/MetricType.java
new file mode 100644
index 000000000000..6fdb21a4d75e
--- /dev/null
+++ b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/MetricType.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss;
+
+/**
+ * Metric type for similarity search.
+ *
+ *
Faiss supports two main metric types for measuring similarity between vectors:
+ *
+ *
{@link #L2} - Euclidean distance (L2 norm). Smaller values indicate more similar vectors.
+ *
{@link #INNER_PRODUCT} - Inner product (dot product). Larger values indicate more similar vectors.
The inner product between two vectors is computed as:
+ * {@code sum(a[i] * b[i])}
+ *
+ *
Larger values indicate more similar vectors. For normalized vectors,
+ * this is equivalent to cosine similarity.
+ */
+ INNER_PRODUCT(1);
+
+ private final int value;
+
+ MetricType(int value) {
+ this.value = value;
+ }
+
+ /**
+ * Get the numeric value of this metric type.
+ *
+ * @return the numeric value
+ */
+ public int getValue() {
+ return value;
+ }
+
+ /**
+ * Get a MetricType from its numeric value.
+ *
+ * @param value the numeric value
+ * @return the corresponding MetricType
+ * @throws IllegalArgumentException if the value is not valid
+ */
+ public static MetricType fromValue(int value) {
+ for (MetricType type : values()) {
+ if (type.value == value) {
+ return type;
+ }
+ }
+ throw new IllegalArgumentException("Unknown metric type value: " + value);
+ }
+}
+
diff --git a/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/NativeLibraryLoader.java b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/NativeLibraryLoader.java
new file mode 100644
index 000000000000..d53384449d2b
--- /dev/null
+++ b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/NativeLibraryLoader.java
@@ -0,0 +1,303 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+/**
+ * Native library loader for Faiss JNI.
+ *
+ *
This class is responsible for loading the native Faiss library from the JAR file or system
+ * path. It follows a similar pattern to RocksDB's native library loading mechanism.
+ *
+ *
The loader attempts to load the library in the following order:
+ *
+ *
From the path specified by the {@code paimon.faiss.lib.path} system property
+ *
From the system library path using {@code System.loadLibrary}
+ *
From the JAR file bundled with the distribution
+ *
+ */
+public class NativeLibraryLoader {
+ private static final Logger LOG = LoggerFactory.getLogger(NativeLibraryLoader.class);
+
+ /** The name of the native library. */
+ private static final String JNI_LIBRARY_NAME = "paimon_faiss_jni";
+
+ /** System property to specify a custom path to the native library. */
+ private static final String LIBRARY_PATH_PROPERTY = "paimon.faiss.lib.path";
+
+ /**
+ * Dependency libraries that need to be loaded before the main JNI library. These are bundled in
+ * the JAR when the main library cannot be statically linked.
+ *
+ *
Order matters! Libraries must be loaded before the libraries that depend on them.
+ */
+ private static final String[] DEPENDENCY_LIBRARIES = {
+ // GCC runtime libraries (must be loaded first as others depend on them)
+ "libgcc_s.so.1",
+ // Quadmath library (needed by gfortran)
+ "libquadmath.so.0",
+ // Fortran runtime (needed by OpenBLAS) - try multiple versions
+ "libgfortran.so.5",
+ "libgfortran.so.4",
+ "libgfortran.so.3",
+ // OpenMP runtime
+ "libgomp.so.1",
+ // BLAS/LAPACK
+ "libblas.so.3",
+ "liblapack.so.3",
+ // OpenBLAS for FAISS (load last as it depends on above)
+ "libopenblas.so.0",
+ };
+
+ /** Whether the native library has been loaded. */
+ private static volatile boolean libraryLoaded = false;
+
+ /** Lock for thread-safe library loading. */
+ private static final Object LOAD_LOCK = new Object();
+
+ /** Temporary directory for extracting native libraries. */
+ private static Path tempDir;
+
+ private NativeLibraryLoader() {
+ // Utility class, no instantiation
+ }
+
+ /**
+ * Load the native library.
+ *
+ * @throws FaissException if the library cannot be loaded
+ */
+ public static void load() throws FaissException {
+ if (libraryLoaded) {
+ return;
+ }
+
+ synchronized (LOAD_LOCK) {
+ if (libraryLoaded) {
+ return;
+ }
+
+ try {
+ loadNativeLibrary();
+ libraryLoaded = true;
+ LOG.info("Faiss native library loaded successfully");
+ } catch (Exception e) {
+ throw new FaissException("Failed to load Faiss native library", e);
+ }
+ }
+ }
+
+ /**
+ * Check if the native library has been loaded.
+ *
+ * @return true if the library is loaded
+ */
+ public static boolean isLoaded() {
+ return libraryLoaded;
+ }
+
+ private static void loadNativeLibrary() throws IOException {
+ // First, try loading from custom path
+ String customPath = System.getProperty(LIBRARY_PATH_PROPERTY);
+ if (customPath != null && !customPath.isEmpty()) {
+ File customLibrary = new File(customPath);
+ if (customLibrary.exists()) {
+ System.load(customLibrary.getAbsolutePath());
+ LOG.info("Loaded Faiss native library from custom path: {}", customPath);
+ return;
+ } else {
+ LOG.warn("Custom library path specified but file not found: {}", customPath);
+ }
+ }
+
+ // Second, try loading from system library path
+ try {
+ System.loadLibrary(JNI_LIBRARY_NAME);
+ LOG.info("Loaded Faiss native library from system path");
+ return;
+ } catch (UnsatisfiedLinkError e) {
+ LOG.debug("Could not load from system path, trying bundled library: {}", e.getMessage());
+ }
+
+ // Third, try loading from JAR
+ loadFromJar();
+ }
+
+ private static void loadFromJar() throws IOException {
+ String libraryPath = getLibraryResourcePath();
+ LOG.debug("Attempting to load native library from JAR: {}", libraryPath);
+
+ try (InputStream is = NativeLibraryLoader.class.getResourceAsStream(libraryPath)) {
+ if (is == null) {
+ throw new IOException(
+ "Native library not found in JAR: "
+ + libraryPath
+ + ". "
+ + "Make sure you are using the correct JAR for your platform ("
+ + getPlatformIdentifier()
+ + ")");
+ }
+
+ // Create temp directory if needed
+ if (tempDir == null) {
+ tempDir = Files.createTempDirectory("paimon-faiss-native");
+ tempDir.toFile().deleteOnExit();
+ }
+
+ // First, extract and load dependency libraries (if bundled)
+ loadDependencyLibraries();
+
+ // Extract native library to temp file
+ String fileName = System.mapLibraryName(JNI_LIBRARY_NAME);
+ File tempFile = new File(tempDir.toFile(), fileName);
+ tempFile.deleteOnExit();
+
+ try (OutputStream os = new FileOutputStream(tempFile)) {
+ byte[] buffer = new byte[8192];
+ int bytesRead;
+ while ((bytesRead = is.read(buffer)) != -1) {
+ os.write(buffer, 0, bytesRead);
+ }
+ }
+
+ // Make the file executable (for Unix-like systems)
+ if (!tempFile.setExecutable(true)) {
+ LOG.warn("Could not set executable permission on native library");
+ }
+
+ // Load the library
+ System.load(tempFile.getAbsolutePath());
+ LOG.info("Loaded Faiss native library from JAR: {}", libraryPath);
+ }
+ }
+
+ /**
+ * Extract and load dependency libraries that are bundled in the JAR. These must be loaded
+ * before the main JNI library to satisfy its dynamic linking requirements.
+ */
+ private static void loadDependencyLibraries() {
+ String os = getOsName();
+ String arch = getArchName();
+
+ for (String depLib : DEPENDENCY_LIBRARIES) {
+ String resourcePath = "/" + os + "/" + arch + "/" + depLib;
+ try (InputStream is = NativeLibraryLoader.class.getResourceAsStream(resourcePath)) {
+ if (is == null) {
+ LOG.debug("Dependency library not bundled: {}", depLib);
+ continue;
+ }
+
+ File tempFile = new File(tempDir.toFile(), depLib);
+ tempFile.deleteOnExit();
+
+ try (OutputStream fos = new FileOutputStream(tempFile)) {
+ byte[] buffer = new byte[8192];
+ int bytesRead;
+ while ((bytesRead = is.read(buffer)) != -1) {
+ fos.write(buffer, 0, bytesRead);
+ }
+ }
+
+ if (!tempFile.setExecutable(true)) {
+ LOG.warn("Could not set executable permission on: {}", depLib);
+ }
+
+ // Load the dependency library
+ System.load(tempFile.getAbsolutePath());
+ LOG.info("Loaded bundled dependency library: {}", depLib);
+ } catch (UnsatisfiedLinkError e) {
+ // Library might already be loaded or not needed
+ LOG.debug("Could not load dependency {}: {}", depLib, e.getMessage());
+ } catch (IOException e) {
+ LOG.debug("Could not extract dependency {}: {}", depLib, e.getMessage());
+ }
+ }
+ }
+
+ private static String getLibraryResourcePath() {
+ String os = getOsName();
+ String arch = getArchName();
+ String libraryFileName = System.mapLibraryName(JNI_LIBRARY_NAME);
+ return "/" + os + "/" + arch + "/" + libraryFileName;
+ }
+
+ /**
+ * Get the platform identifier for the current system.
+ *
+ * @return platform identifier string (e.g., "linux/amd64", "darwin/aarch64")
+ */
+ static String getPlatformIdentifier() {
+ return getOsName() + "/" + getArchName();
+ }
+
+ /**
+ * Get the normalized OS name for the current system.
+ *
+ * @return OS name string (e.g., "linux", "darwin")
+ */
+ private static String getOsName() {
+ String osName = System.getProperty("os.name").toLowerCase();
+
+ if (osName.contains("linux")) {
+ return "linux";
+ } else if (osName.contains("mac") || osName.contains("darwin")) {
+ return "darwin";
+ } else {
+ throw new UnsupportedOperationException(
+ "Unsupported operating system: " + osName + ". Only Linux and macOS are supported.");
+ }
+ }
+
+ /**
+ * Get the normalized architecture name for the current system.
+ *
+ * @return architecture name string (e.g., "amd64", "aarch64")
+ */
+ private static String getArchName() {
+ String osArch = System.getProperty("os.arch").toLowerCase();
+
+ if (osArch.equals("amd64") || osArch.equals("x86_64")) {
+ return "amd64";
+ } else if (osArch.equals("aarch64") || osArch.equals("arm64")) {
+ return "aarch64";
+ } else {
+ throw new UnsupportedOperationException("Unsupported architecture: " + osArch);
+ }
+ }
+
+ /**
+ * Get the name of the JNI library.
+ *
+ * @return the library name
+ */
+ public static String getLibraryName() {
+ return JNI_LIBRARY_NAME;
+ }
+}
+
diff --git a/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/RangeSearchResult.java b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/RangeSearchResult.java
new file mode 100644
index 000000000000..5044e276cdd7
--- /dev/null
+++ b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/RangeSearchResult.java
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss;
+
+import java.util.Arrays;
+
+/**
+ * Result of a range search operation.
+ *
+ *
Unlike k-NN search which returns a fixed number of neighbors per query,
+ * range search returns all neighbors within a given radius, which can vary
+ * per query.
+ */
+public class RangeSearchResult implements AutoCloseable {
+
+ private long nativeHandle;
+ private final int numQueries;
+ private long[] limits;
+ private long[] labels;
+ private float[] distances;
+
+ /**
+ * Create a new RangeSearchResult from a native handle.
+ *
+ * @param nativeHandle the native handle
+ * @param numQueries the number of query vectors
+ */
+ RangeSearchResult(long nativeHandle, int numQueries) {
+ this.nativeHandle = nativeHandle;
+ this.numQueries = numQueries;
+ }
+
+ /**
+ * Get the number of query vectors.
+ *
+ * @return the number of queries
+ */
+ public int getNumQueries() {
+ return numQueries;
+ }
+
+ /**
+ * Get the number of results for a specific query.
+ *
+ * @param queryIndex the query index
+ * @return the number of results
+ */
+ public long getResultCount(int queryIndex) {
+ ensureLimitsLoaded();
+ if (queryIndex < 0 || queryIndex >= numQueries) {
+ throw new IndexOutOfBoundsException("Query index out of bounds: " + queryIndex);
+ }
+ return limits[queryIndex + 1] - limits[queryIndex];
+ }
+
+ /**
+ * Get the total number of results across all queries.
+ *
+ * @return the total number of results
+ */
+ public long getTotalResultCount() {
+ ensureLimitsLoaded();
+ return limits[numQueries];
+ }
+
+ /**
+ * Get the labels for a specific query.
+ *
+ * @param queryIndex the query index
+ * @return the labels for this query
+ */
+ public long[] getLabelsForQuery(int queryIndex) {
+ ensureFullyLoaded();
+ if (queryIndex < 0 || queryIndex >= numQueries) {
+ throw new IndexOutOfBoundsException("Query index out of bounds: " + queryIndex);
+ }
+ int start = (int) limits[queryIndex];
+ int end = (int) limits[queryIndex + 1];
+ return Arrays.copyOfRange(labels, start, end);
+ }
+
+ /**
+ * Get the distances for a specific query.
+ *
+ * @param queryIndex the query index
+ * @return the distances for this query
+ */
+ public float[] getDistancesForQuery(int queryIndex) {
+ ensureFullyLoaded();
+ if (queryIndex < 0 || queryIndex >= numQueries) {
+ throw new IndexOutOfBoundsException("Query index out of bounds: " + queryIndex);
+ }
+ int start = (int) limits[queryIndex];
+ int end = (int) limits[queryIndex + 1];
+ return Arrays.copyOfRange(distances, start, end);
+ }
+
+ /**
+ * Get all labels as a flat array.
+ *
+ * @return all labels
+ */
+ public long[] getAllLabels() {
+ ensureFullyLoaded();
+ return labels;
+ }
+
+ /**
+ * Get all distances as a flat array.
+ *
+ * @return all distances
+ */
+ public float[] getAllDistances() {
+ ensureFullyLoaded();
+ return distances;
+ }
+
+ private void ensureLimitsLoaded() {
+ if (limits == null && nativeHandle != 0) {
+ limits = FaissNative.rangeSearchResultGetLimits(nativeHandle);
+ }
+ }
+
+ private void ensureFullyLoaded() {
+ ensureLimitsLoaded();
+ if (labels == null && nativeHandle != 0) {
+ labels = FaissNative.rangeSearchResultGetLabels(nativeHandle);
+ distances = FaissNative.rangeSearchResultGetDistances(nativeHandle);
+ }
+ }
+
+ @Override
+ public void close() {
+ if (nativeHandle != 0) {
+ FaissNative.rangeSearchResultDestroy(nativeHandle);
+ nativeHandle = 0;
+ }
+ }
+
+ @Override
+ protected void finalize() throws Throwable {
+ try {
+ close();
+ } finally {
+ super.finalize();
+ }
+ }
+}
+
diff --git a/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/SearchResult.java b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/SearchResult.java
new file mode 100644
index 000000000000..caf51420a968
--- /dev/null
+++ b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/SearchResult.java
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss;
+
+import java.util.Arrays;
+
+/**
+ * Result of a k-nearest neighbor search operation.
+ *
+ *
Contains the labels (IDs) and distances of the k nearest neighbors
+ * for each query vector.
+ */
+public class SearchResult {
+
+ private final int numQueries;
+ private final int k;
+ private final long[] labels;
+ private final float[] distances;
+
+ /**
+ * Create a new SearchResult.
+ *
+ * @param numQueries the number of query vectors
+ * @param k the number of neighbors per query
+ * @param labels the neighbor labels (numQueries * k)
+ * @param distances the distances to neighbors (numQueries * k)
+ */
+ public SearchResult(int numQueries, int k, long[] labels, float[] distances) {
+ this.numQueries = numQueries;
+ this.k = k;
+ this.labels = labels;
+ this.distances = distances;
+ }
+
+ /**
+ * Get the number of query vectors.
+ *
+ * @return the number of queries
+ */
+ public int getNumQueries() {
+ return numQueries;
+ }
+
+ /**
+ * Get the number of neighbors per query.
+ *
+ * @return k value
+ */
+ public int getK() {
+ return k;
+ }
+
+ /**
+ * Get all labels as a flat array.
+ *
+ *
The array is organized as: [query0_neighbor0, query0_neighbor1, ..., query1_neighbor0, ...]
+ *
+ * @return the labels array
+ */
+ public long[] getLabels() {
+ return labels;
+ }
+
+ /**
+ * Get all distances as a flat array.
+ *
+ *
The array is organized as: [query0_dist0, query0_dist1, ..., query1_dist0, ...]
+ *
+ * @return the distances array
+ */
+ public float[] getDistances() {
+ return distances;
+ }
+
+ /**
+ * Get the labels for a specific query.
+ *
+ * @param queryIndex the query index
+ * @return the labels for this query
+ */
+ public long[] getLabelsForQuery(int queryIndex) {
+ if (queryIndex < 0 || queryIndex >= numQueries) {
+ throw new IndexOutOfBoundsException("Query index out of bounds: " + queryIndex);
+ }
+ int start = queryIndex * k;
+ return Arrays.copyOfRange(labels, start, start + k);
+ }
+
+ /**
+ * Get the distances for a specific query.
+ *
+ * @param queryIndex the query index
+ * @return the distances for this query
+ */
+ public float[] getDistancesForQuery(int queryIndex) {
+ if (queryIndex < 0 || queryIndex >= numQueries) {
+ throw new IndexOutOfBoundsException("Query index out of bounds: " + queryIndex);
+ }
+ int start = queryIndex * k;
+ return Arrays.copyOfRange(distances, start, start + k);
+ }
+
+ /**
+ * Get the label of a specific neighbor for a specific query.
+ *
+ * @param queryIndex the query index
+ * @param neighborIndex the neighbor index (0 = closest)
+ * @return the label
+ */
+ public long getLabel(int queryIndex, int neighborIndex) {
+ if (queryIndex < 0 || queryIndex >= numQueries) {
+ throw new IndexOutOfBoundsException("Query index out of bounds: " + queryIndex);
+ }
+ if (neighborIndex < 0 || neighborIndex >= k) {
+ throw new IndexOutOfBoundsException("Neighbor index out of bounds: " + neighborIndex);
+ }
+ return labels[queryIndex * k + neighborIndex];
+ }
+
+ /**
+ * Get the distance of a specific neighbor for a specific query.
+ *
+ * @param queryIndex the query index
+ * @param neighborIndex the neighbor index (0 = closest)
+ * @return the distance
+ */
+ public float getDistance(int queryIndex, int neighborIndex) {
+ if (queryIndex < 0 || queryIndex >= numQueries) {
+ throw new IndexOutOfBoundsException("Query index out of bounds: " + queryIndex);
+ }
+ if (neighborIndex < 0 || neighborIndex >= k) {
+ throw new IndexOutOfBoundsException("Neighbor index out of bounds: " + neighborIndex);
+ }
+ return distances[queryIndex * k + neighborIndex];
+ }
+
+ @Override
+ public String toString() {
+ return "SearchResult{" +
+ "numQueries=" + numQueries +
+ ", k=" + k +
+ ", labels=" + Arrays.toString(labels) +
+ ", distances=" + Arrays.toString(distances) +
+ '}';
+ }
+}
+
diff --git a/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/example/BasicExample.java b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/example/BasicExample.java
new file mode 100644
index 000000000000..ad5c60fe9f87
--- /dev/null
+++ b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/example/BasicExample.java
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss.example;
+
+import org.apache.paimon.faiss.Faiss;
+import org.apache.paimon.faiss.Index;
+import org.apache.paimon.faiss.IndexFactory;
+import org.apache.paimon.faiss.IndexHNSW;
+import org.apache.paimon.faiss.MetricType;
+import org.apache.paimon.faiss.SearchResult;
+
+import java.util.Random;
+
+/**
+ * Basic example demonstrating the usage of Paimon Faiss.
+ *
+ *
This example shows how to:
+ *
+ *
Create different types of indexes
+ *
Add vectors to an index
+ *
Search for nearest neighbors
+ *
Serialize and deserialize indexes
+ *
+ */
+public class BasicExample {
+
+ private static final int DIMENSION = 128;
+ private static final int NUM_VECTORS = 10000;
+ private static final int K = 5;
+
+ public static void main(String[] args) {
+ System.out.println("Paimon Faiss JNI Basic Example");
+ System.out.println("==========================");
+ System.out.println("Faiss Version: " + Faiss.getVersion());
+ System.out.println();
+
+ // Generate random data
+ Random random = new Random(42);
+ float[] database = generateRandomVectors(random, NUM_VECTORS, DIMENSION);
+ float[] query = generateRandomVectors(random, 1, DIMENSION);
+
+ // Example 1: Flat Index (exact search)
+ System.out.println("Example 1: Flat Index (Exact Search)");
+ System.out.println("-------------------------------------");
+ flatIndexExample(database, query);
+ System.out.println();
+
+ // Example 2: HNSW Index (approximate search)
+ System.out.println("Example 2: HNSW Index (Approximate Search)");
+ System.out.println("-------------------------------------------");
+ hnswIndexExample(database, query);
+ System.out.println();
+
+ // Example 3: Index with custom IDs
+ System.out.println("Example 3: Index with Custom IDs");
+ System.out.println("---------------------------------");
+ customIdsExample(database, query);
+ System.out.println();
+
+ // Example 4: Index serialization
+ System.out.println("Example 4: Index Serialization");
+ System.out.println("-------------------------------");
+ serializationExample(database, query);
+ }
+
+ private static void flatIndexExample(float[] database, float[] query) {
+ try (Index index = IndexFactory.createFlat(DIMENSION, MetricType.L2)) {
+ System.out.println("Created flat index with dimension: " + index.getDimension());
+
+ // Add vectors
+ long startTime = System.currentTimeMillis();
+ index.add(database);
+ long addTime = System.currentTimeMillis() - startTime;
+ System.out.println("Added " + index.getCount() + " vectors in " + addTime + " ms");
+
+ // Search
+ startTime = System.currentTimeMillis();
+ SearchResult result = index.searchSingle(query, K);
+ long searchTime = System.currentTimeMillis() - startTime;
+ System.out.println("Search completed in " + searchTime + " ms");
+
+ // Print results
+ System.out.println("Top " + K + " results:");
+ for (int i = 0; i < K; i++) {
+ System.out.printf(" %d: id=%d, distance=%.4f%n",
+ i + 1, result.getLabel(0, i), result.getDistance(0, i));
+ }
+ }
+ }
+
+ private static void hnswIndexExample(float[] database, float[] query) {
+ try (Index index = IndexFactory.createHNSW(DIMENSION, 32, MetricType.L2)) {
+ System.out.println("Created HNSW index");
+
+ // Add vectors
+ long startTime = System.currentTimeMillis();
+ index.add(database);
+ long addTime = System.currentTimeMillis() - startTime;
+ System.out.println("Added " + index.getCount() + " vectors in " + addTime + " ms");
+
+ // Configure search parameters
+ IndexHNSW.setEfSearch(index, 64);
+ System.out.println("Set efSearch to 64");
+
+ // Search
+ startTime = System.currentTimeMillis();
+ SearchResult result = index.searchSingle(query, K);
+ long searchTime = System.currentTimeMillis() - startTime;
+ System.out.println("Search completed in " + searchTime + " ms");
+
+ // Print results
+ System.out.println("Top " + K + " results:");
+ for (int i = 0; i < K; i++) {
+ System.out.printf(" %d: id=%d, distance=%.4f%n",
+ i + 1, result.getLabel(0, i), result.getDistance(0, i));
+ }
+ }
+ }
+
+ private static void customIdsExample(float[] database, float[] query) {
+ try (Index index = IndexFactory.createFlatWithIds(DIMENSION, MetricType.L2)) {
+ // Create custom IDs
+ long[] ids = new long[NUM_VECTORS];
+ for (int i = 0; i < NUM_VECTORS; i++) {
+ ids[i] = 1000000L + i; // Start from 1,000,000
+ }
+
+ // Add vectors with custom IDs
+ index.addWithIds(database, ids);
+ System.out.println("Added " + index.getCount() + " vectors with custom IDs");
+
+ // Search
+ SearchResult result = index.searchSingle(query, K);
+
+ // Print results (should show custom IDs)
+ System.out.println("Top " + K + " results (with custom IDs):");
+ for (int i = 0; i < K; i++) {
+ System.out.printf(" %d: id=%d, distance=%.4f%n",
+ i + 1, result.getLabel(0, i), result.getDistance(0, i));
+ }
+ }
+ }
+
+ private static void serializationExample(float[] database, float[] query) {
+ byte[] serialized;
+
+ // Create and serialize index
+ try (Index index = IndexFactory.createFlat(DIMENSION, MetricType.L2)) {
+ index.add(database);
+ serialized = index.serialize();
+ System.out.println("Serialized index to " + serialized.length + " bytes");
+ }
+
+ // Deserialize and search
+ try (Index index = Index.deserialize(serialized)) {
+ System.out.println("Deserialized index with " + index.getCount() + " vectors");
+
+ SearchResult result = index.searchSingle(query, K);
+ System.out.println("Search on deserialized index:");
+ System.out.printf(" Top result: id=%d, distance=%.4f%n",
+ result.getLabel(0, 0), result.getDistance(0, 0));
+ }
+ }
+
+ private static float[] generateRandomVectors(Random random, int n, int d) {
+ float[] vectors = new float[n * d];
+ for (int i = 0; i < vectors.length; i++) {
+ vectors[i] = random.nextFloat();
+ }
+ return vectors;
+ }
+}
+
diff --git a/paimon-faiss-jni/src/main/native/CMakeLists.txt b/paimon-faiss-jni/src/main/native/CMakeLists.txt
new file mode 100644
index 000000000000..73aea79ff444
--- /dev/null
+++ b/paimon-faiss-jni/src/main/native/CMakeLists.txt
@@ -0,0 +1,445 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cmake_minimum_required(VERSION 3.14)
+project(paimon_faiss_jni VERSION 0.1.0 LANGUAGES CXX)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+
+# Options
+option(FAISS_ENABLE_GPU "Build with GPU support" OFF)
+option(FAISS_OPT_LEVEL "Optimization level (generic, avx2, avx512)" "generic")
+option(BUILD_FAT_LIB "Build fat library with all dependencies statically linked" ON)
+
+# Find JNI
+find_package(JNI REQUIRED)
+include_directories(${JNI_INCLUDE_DIRS})
+
+# Find OpenMP (with special handling for macOS)
+if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
+ # macOS requires special handling for OpenMP
+ # First try to find libomp from Homebrew
+ execute_process(
+ COMMAND brew --prefix libomp
+ OUTPUT_VARIABLE HOMEBREW_LIBOMP_PREFIX
+ OUTPUT_STRIP_TRAILING_WHITESPACE
+ ERROR_QUIET
+ )
+
+ if(HOMEBREW_LIBOMP_PREFIX)
+ message(STATUS "Found Homebrew libomp: ${HOMEBREW_LIBOMP_PREFIX}")
+ set(OpenMP_C_FLAGS "-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include")
+ set(OpenMP_CXX_FLAGS "-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include")
+ set(OpenMP_C_LIB_NAMES "omp")
+ set(OpenMP_CXX_LIB_NAMES "omp")
+ set(OpenMP_omp_LIBRARY "${HOMEBREW_LIBOMP_PREFIX}/lib/libomp.dylib")
+
+ # Create imported target manually
+ if(NOT TARGET OpenMP::OpenMP_CXX)
+ add_library(OpenMP::OpenMP_CXX SHARED IMPORTED)
+ set_target_properties(OpenMP::OpenMP_CXX PROPERTIES
+ IMPORTED_LOCATION "${HOMEBREW_LIBOMP_PREFIX}/lib/libomp.dylib"
+ INTERFACE_INCLUDE_DIRECTORIES "${HOMEBREW_LIBOMP_PREFIX}/include"
+ INTERFACE_COMPILE_OPTIONS "-Xpreprocessor;-fopenmp"
+ )
+ endif()
+ set(OpenMP_FOUND TRUE)
+ else()
+ message(WARNING "libomp not found via Homebrew. Trying standard OpenMP detection...")
+ find_package(OpenMP)
+ endif()
+else()
+ find_package(OpenMP REQUIRED)
+endif()
+
+if(NOT OpenMP_FOUND AND NOT TARGET OpenMP::OpenMP_CXX)
+ message(WARNING "OpenMP not found. Building without OpenMP support.")
+ message(WARNING "On macOS, install libomp: brew install libomp")
+endif()
+
+# Find Faiss
+# For fat lib, prefer static libraries
+if(BUILD_FAT_LIB)
+ message(STATUS "Building fat library - preferring static libraries")
+ set(CMAKE_FIND_LIBRARY_SUFFIXES ".a" ".so" ".dylib")
+ set(FAISS_STATIC_PREFERRED TRUE)
+else()
+ set(FAISS_STATIC_PREFERRED FALSE)
+endif()
+
+# First try to find Faiss via CMake config
+find_package(faiss CONFIG QUIET)
+
+if(NOT faiss_FOUND)
+ # Try pkg-config
+ find_package(PkgConfig QUIET)
+ if(PKG_CONFIG_FOUND)
+ pkg_check_modules(FAISS QUIET faiss)
+ endif()
+
+ if(NOT FAISS_FOUND)
+ # Manual search - look in common locations
+ find_path(FAISS_INCLUDE_DIR
+ NAMES faiss/Index.h
+ PATHS
+ /usr/local/include
+ /usr/include
+ ${FAISS_ROOT}/include
+ $ENV{FAISS_ROOT}/include
+ )
+
+ # For fat lib, try to find static library first
+ if(BUILD_FAT_LIB)
+ find_library(FAISS_LIBRARY_STATIC
+ NAMES libfaiss.a faiss_static
+ PATHS
+ /usr/local/lib
+ /usr/lib
+ /usr/local/lib64
+ /usr/lib64
+ ${FAISS_ROOT}/lib
+ ${FAISS_ROOT}/lib64
+ $ENV{FAISS_ROOT}/lib
+ $ENV{FAISS_ROOT}/lib64
+ )
+ if(FAISS_LIBRARY_STATIC)
+ set(FAISS_LIBRARY ${FAISS_LIBRARY_STATIC})
+ message(STATUS "Found Faiss static library: ${FAISS_LIBRARY}")
+ endif()
+ endif()
+
+ # If static not found or not building fat lib, find any library
+ if(NOT FAISS_LIBRARY)
+ find_library(FAISS_LIBRARY
+ NAMES faiss
+ PATHS
+ /usr/local/lib
+ /usr/lib
+ /usr/local/lib64
+ /usr/lib64
+ ${FAISS_ROOT}/lib
+ ${FAISS_ROOT}/lib64
+ $ENV{FAISS_ROOT}/lib
+ $ENV{FAISS_ROOT}/lib64
+ )
+ endif()
+
+ if(FAISS_INCLUDE_DIR AND FAISS_LIBRARY)
+ set(FAISS_FOUND TRUE)
+ set(FAISS_INCLUDE_DIRS ${FAISS_INCLUDE_DIR})
+ set(FAISS_LIBRARIES ${FAISS_LIBRARY})
+ message(STATUS "Found Faiss: ${FAISS_LIBRARY}")
+ else()
+ message(FATAL_ERROR "Faiss not found. Please install Faiss or set FAISS_ROOT environment variable.")
+ endif()
+ endif()
+endif()
+
+# Find BLAS/LAPACK for static linking (Faiss depends on them)
+if(BUILD_FAT_LIB)
+ # Save original suffixes
+ set(_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
+
+ # Force static library search only
+ set(CMAKE_FIND_LIBRARY_SUFFIXES ".a")
+
+ # Try to find OpenBLAS static library
+ find_library(OPENBLAS_STATIC_LIBRARY
+ NAMES openblas openblas_static
+ PATHS
+ /usr/local/lib
+ /usr/lib
+ /usr/local/lib64
+ /usr/lib64
+ /usr/lib/x86_64-linux-gnu
+ /usr/lib/aarch64-linux-gnu
+ ${OPENBLAS_ROOT}/lib
+ $ENV{OPENBLAS_ROOT}/lib
+ NO_DEFAULT_PATH
+ )
+
+ # Also try default paths
+ if(NOT OPENBLAS_STATIC_LIBRARY)
+ find_library(OPENBLAS_STATIC_LIBRARY
+ NAMES openblas openblas_static
+ )
+ endif()
+
+ if(OPENBLAS_STATIC_LIBRARY AND OPENBLAS_STATIC_LIBRARY MATCHES "\\.a$")
+ message(STATUS "Found OpenBLAS static library: ${OPENBLAS_STATIC_LIBRARY}")
+ set(OPENBLAS_USE_STATIC TRUE)
+ list(APPEND FAISS_STATIC_LIBS ${OPENBLAS_STATIC_LIBRARY})
+ else()
+ message(STATUS "OpenBLAS static library not found, trying shared library")
+ set(OPENBLAS_USE_STATIC FALSE)
+
+ # Restore suffixes and find shared library
+ set(CMAKE_FIND_LIBRARY_SUFFIXES ${_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES})
+ find_library(OPENBLAS_SHARED_LIBRARY
+ NAMES openblas
+ PATHS
+ /usr/local/lib
+ /usr/lib
+ /usr/local/lib64
+ /usr/lib64
+ /usr/lib/x86_64-linux-gnu
+ /usr/lib/aarch64-linux-gnu
+ ${OPENBLAS_ROOT}/lib
+ $ENV{OPENBLAS_ROOT}/lib
+ )
+ if(OPENBLAS_SHARED_LIBRARY)
+ message(STATUS "Found OpenBLAS shared library: ${OPENBLAS_SHARED_LIBRARY}")
+ list(APPEND FAISS_EXTRA_LIBS ${OPENBLAS_SHARED_LIBRARY})
+ # Mark that we need to bundle this library
+ set(BUNDLE_OPENBLAS TRUE)
+ set(BUNDLE_OPENBLAS_PATH ${OPENBLAS_SHARED_LIBRARY})
+ else()
+ # Try to find any BLAS
+ find_package(BLAS QUIET)
+ if(BLAS_FOUND)
+ list(APPEND FAISS_EXTRA_LIBS ${BLAS_LIBRARIES})
+ message(STATUS "Found BLAS: ${BLAS_LIBRARIES}")
+ endif()
+ endif()
+ endif()
+
+ # Restore suffixes for static search
+ set(CMAKE_FIND_LIBRARY_SUFFIXES ".a")
+
+ # Find LAPACK static library
+ find_library(LAPACK_STATIC_LIBRARY
+ NAMES lapack
+ PATHS
+ /usr/local/lib
+ /usr/lib
+ /usr/local/lib64
+ /usr/lib64
+ /usr/lib/x86_64-linux-gnu
+ /usr/lib/aarch64-linux-gnu
+ )
+ if(LAPACK_STATIC_LIBRARY AND LAPACK_STATIC_LIBRARY MATCHES "\\.a$")
+ message(STATUS "Found LAPACK static library: ${LAPACK_STATIC_LIBRARY}")
+ list(APPEND FAISS_STATIC_LIBS ${LAPACK_STATIC_LIBRARY})
+ endif()
+
+ # Find gfortran static library (needed by OpenBLAS)
+ find_library(GFORTRAN_STATIC_LIBRARY
+ NAMES gfortran
+ )
+ if(GFORTRAN_STATIC_LIBRARY AND GFORTRAN_STATIC_LIBRARY MATCHES "\\.a$")
+ message(STATUS "Found gfortran static library: ${GFORTRAN_STATIC_LIBRARY}")
+ list(APPEND FAISS_STATIC_LIBS ${GFORTRAN_STATIC_LIBRARY})
+ endif()
+
+ # Restore original suffixes
+ set(CMAKE_FIND_LIBRARY_SUFFIXES ${_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES})
+
+ # On Linux, we may need pthread, dl, and m (these are typically dynamically linked)
+ if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ list(APPEND FAISS_EXTRA_LIBS pthread dl m)
+
+ # Try to find gfortran shared if static not found
+ if(NOT GFORTRAN_STATIC_LIBRARY)
+ find_library(GFORTRAN_LIBRARY gfortran)
+ if(GFORTRAN_LIBRARY)
+ list(APPEND FAISS_EXTRA_LIBS ${GFORTRAN_LIBRARY})
+ endif()
+ endif()
+ endif()
+endif()
+
+# Platform detection - using {os}/{arch} directory structure
+if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ set(PLATFORM_OS "linux")
+ if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64")
+ set(PLATFORM_ARCH "amd64")
+ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64")
+ set(PLATFORM_ARCH "aarch64")
+ else()
+ message(FATAL_ERROR "Unsupported Linux architecture: ${CMAKE_SYSTEM_PROCESSOR}")
+ endif()
+elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
+ set(PLATFORM_OS "darwin")
+ if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64")
+ set(PLATFORM_ARCH "amd64")
+ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64")
+ set(PLATFORM_ARCH "aarch64")
+ else()
+ message(FATAL_ERROR "Unsupported macOS architecture: ${CMAKE_SYSTEM_PROCESSOR}")
+ endif()
+else()
+ message(FATAL_ERROR "Unsupported operating system: ${CMAKE_SYSTEM_NAME}. Only Linux and macOS are supported.")
+endif()
+
+set(PLATFORM_DIR "${PLATFORM_OS}/${PLATFORM_ARCH}")
+message(STATUS "Building for platform: ${PLATFORM_DIR}")
+
+# Build the JNI library
+add_library(paimon_faiss_jni SHARED
+ paimon_faiss_jni.cpp
+)
+
+# Include directories
+if(TARGET faiss)
+ target_link_libraries(paimon_faiss_jni PRIVATE faiss)
+else()
+ target_include_directories(paimon_faiss_jni PRIVATE ${FAISS_INCLUDE_DIRS})
+ target_link_libraries(paimon_faiss_jni PRIVATE ${FAISS_LIBRARIES})
+endif()
+
+# Link extra libraries for fat lib (BLAS, LAPACK, etc.)
+if(BUILD_FAT_LIB)
+ # Link static libraries with --whole-archive to embed all symbols
+ if(FAISS_STATIC_LIBS AND CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ message(STATUS "Linking static libraries with --whole-archive: ${FAISS_STATIC_LIBS}")
+ target_link_options(paimon_faiss_jni PRIVATE
+ "-Wl,--whole-archive"
+ )
+ target_link_libraries(paimon_faiss_jni PRIVATE ${FAISS_STATIC_LIBS})
+ target_link_options(paimon_faiss_jni PRIVATE
+ "-Wl,--no-whole-archive"
+ )
+ elseif(FAISS_STATIC_LIBS)
+ # macOS doesn't use --whole-archive, use -force_load instead
+ foreach(static_lib ${FAISS_STATIC_LIBS})
+ target_link_options(paimon_faiss_jni PRIVATE "-Wl,-force_load,${static_lib}")
+ endforeach()
+ message(STATUS "Linking static libraries with -force_load: ${FAISS_STATIC_LIBS}")
+ endif()
+
+ # Link remaining shared libraries
+ if(FAISS_EXTRA_LIBS)
+ target_link_libraries(paimon_faiss_jni PRIVATE ${FAISS_EXTRA_LIBS})
+ message(STATUS "Linking extra libraries: ${FAISS_EXTRA_LIBS}")
+ endif()
+endif()
+
+# Link OpenMP - always use dynamic linking for OpenMP (static libgomp.a often lacks -fPIC)
+if(TARGET OpenMP::OpenMP_CXX)
+ target_link_libraries(paimon_faiss_jni PRIVATE OpenMP::OpenMP_CXX)
+ message(STATUS "Linking OpenMP via imported target")
+elseif(OpenMP_FOUND)
+ target_compile_options(paimon_faiss_jni PRIVATE ${OpenMP_CXX_FLAGS})
+ # Link against the shared gomp library
+ if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ find_library(GOMP_SHARED_LIBRARY NAMES gomp PATHS /usr/lib /usr/lib64 /usr/lib/x86_64-linux-gnu)
+ if(GOMP_SHARED_LIBRARY)
+ target_link_libraries(paimon_faiss_jni PRIVATE ${GOMP_SHARED_LIBRARY})
+ message(STATUS "Linking OpenMP shared library: ${GOMP_SHARED_LIBRARY}")
+ else()
+ target_link_libraries(paimon_faiss_jni PRIVATE gomp)
+ message(STATUS "Linking OpenMP: gomp")
+ endif()
+ else()
+ target_link_libraries(paimon_faiss_jni PRIVATE ${OpenMP_CXX_FLAGS})
+ endif()
+endif()
+
+# Platform-specific settings
+if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
+ # macOS specific settings
+ set_target_properties(paimon_faiss_jni PROPERTIES
+ SUFFIX ".dylib"
+ INSTALL_NAME_DIR "@rpath"
+ BUILD_WITH_INSTALL_RPATH TRUE
+ )
+
+ # Link against libc++
+ target_link_libraries(paimon_faiss_jni PRIVATE c++)
+
+ # For fat lib on macOS, embed OpenMP library path
+ if(BUILD_FAT_LIB AND HOMEBREW_LIBOMP_PREFIX)
+ target_link_options(paimon_faiss_jni PRIVATE
+ "-Wl,-rpath,@loader_path"
+ "-Wl,-rpath,${HOMEBREW_LIBOMP_PREFIX}/lib"
+ )
+ endif()
+
+elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ # Linux specific settings
+ set_target_properties(paimon_faiss_jni PROPERTIES
+ SUFFIX ".so"
+ )
+
+ if(BUILD_FAT_LIB)
+ # For fat lib, use static libstdc++ and libgcc
+ target_link_options(paimon_faiss_jni PRIVATE
+ "-static-libstdc++"
+ "-static-libgcc"
+ "-Wl,--exclude-libs,ALL"
+ )
+ message(STATUS "Using static libstdc++ and libgcc for fat lib")
+ else()
+ target_link_libraries(paimon_faiss_jni PRIVATE stdc++)
+ endif()
+
+endif()
+
+# Set output directory - output to src/main/resources/{os}/{arch}/
+set(OUTPUT_DIR "${CMAKE_SOURCE_DIR}/../resources/${PLATFORM_DIR}")
+set_target_properties(paimon_faiss_jni PROPERTIES
+ LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_DIR}
+ RUNTIME_OUTPUT_DIRECTORY ${OUTPUT_DIR}
+)
+
+# Optimization level
+if(FAISS_OPT_LEVEL STREQUAL "avx2")
+ target_compile_options(paimon_faiss_jni PRIVATE -mavx2 -mfma)
+ message(STATUS "Building with AVX2 optimizations")
+elseif(FAISS_OPT_LEVEL STREQUAL "avx512")
+ target_compile_options(paimon_faiss_jni PRIVATE -mavx512f -mavx512dq -mavx512bw -mavx512vl)
+ message(STATUS "Building with AVX-512 optimizations")
+else()
+ message(STATUS "Building with generic optimizations")
+endif()
+
+# Copy bundled shared libraries to output directory and set rpath
+if(BUILD_FAT_LIB AND BUNDLE_OPENBLAS AND BUNDLE_OPENBLAS_PATH)
+ message(STATUS "Will bundle OpenBLAS shared library: ${BUNDLE_OPENBLAS_PATH}")
+
+ # Get the actual library file (resolve symlinks)
+ get_filename_component(OPENBLAS_REALPATH ${BUNDLE_OPENBLAS_PATH} REALPATH)
+ get_filename_component(OPENBLAS_FILENAME ${OPENBLAS_REALPATH} NAME)
+
+ # Copy OpenBLAS to output directory after build
+ add_custom_command(TARGET paimon_faiss_jni POST_BUILD
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different
+ ${OPENBLAS_REALPATH}
+ ${OUTPUT_DIR}/libopenblas.so.0
+ COMMENT "Bundling OpenBLAS shared library"
+ )
+
+ # Set rpath to look in the same directory as the library
+ if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ target_link_options(paimon_faiss_jni PRIVATE
+ "-Wl,-rpath,$ORIGIN"
+ )
+ # Also patch the library after build to use the bundled libopenblas
+ add_custom_command(TARGET paimon_faiss_jni POST_BUILD
+ COMMAND patchelf --set-rpath "$$ORIGIN" ${OUTPUT_DIR}/libpaimon_faiss_jni.so || true
+ COMMENT "Setting rpath to $ORIGIN"
+ )
+ endif()
+endif()
+
+# Install target
+install(TARGETS paimon_faiss_jni
+ LIBRARY DESTINATION ${PLATFORM_DIR}
+ RUNTIME DESTINATION ${PLATFORM_DIR}
+)
+
diff --git a/paimon-faiss-jni/src/main/native/paimon_faiss_jni.cpp b/paimon-faiss-jni/src/main/native/paimon_faiss_jni.cpp
new file mode 100644
index 000000000000..38dcff06fc15
--- /dev/null
+++ b/paimon-faiss-jni/src/main/native/paimon_faiss_jni.cpp
@@ -0,0 +1,464 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon_faiss_jni.h"
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#ifdef _OPENMP
+#include
+#endif
+
+#include
+#include
+#include
+#include
+#include
+
+// Helper macro for exception handling
+#define FAISS_TRY try {
+#define FAISS_CATCH(env) \
+ } catch (const std::exception& e) { \
+ jclass exceptionClass = env->FindClass("org/apache/paimon/faiss/FaissException"); \
+ if (exceptionClass != nullptr) { \
+ env->ThrowNew(exceptionClass, e.what()); \
+ } \
+ } catch (...) { \
+ jclass exceptionClass = env->FindClass("org/apache/paimon/faiss/FaissException"); \
+ if (exceptionClass != nullptr) { \
+ env->ThrowNew(exceptionClass, "Unknown native exception"); \
+ } \
+ }
+
+// Helper function to convert jstring to std::string
+static std::string jstringToString(JNIEnv* env, jstring jstr) {
+ if (jstr == nullptr) {
+ return "";
+ }
+ const char* chars = env->GetStringUTFChars(jstr, nullptr);
+ std::string result(chars);
+ env->ReleaseStringUTFChars(jstr, chars);
+ return result;
+}
+
+// Helper function to get index pointer from handle
+static faiss::Index* getIndex(jlong handle) {
+ return reinterpret_cast(handle);
+}
+
+// Helper to get IVF index
+static faiss::IndexIVF* getIndexIVF(jlong handle) {
+ faiss::Index* index = getIndex(handle);
+
+ // Try direct cast
+ faiss::IndexIVF* ivf = dynamic_cast(index);
+ if (ivf != nullptr) {
+ return ivf;
+ }
+
+ // Try through IDMap wrapper
+ faiss::IndexIDMap* idmap = dynamic_cast(index);
+ if (idmap != nullptr) {
+ ivf = dynamic_cast(idmap->index);
+ if (ivf != nullptr) {
+ return ivf;
+ }
+ }
+
+ throw std::runtime_error("Index is not an IVF index");
+}
+
+// Helper to get HNSW index
+static faiss::IndexHNSW* getIndexHNSW(jlong handle) {
+ faiss::Index* index = getIndex(handle);
+
+ // Try direct cast
+ faiss::IndexHNSW* hnsw = dynamic_cast(index);
+ if (hnsw != nullptr) {
+ return hnsw;
+ }
+
+ // Try through IDMap wrapper
+ faiss::IndexIDMap* idmap = dynamic_cast(index);
+ if (idmap != nullptr) {
+ hnsw = dynamic_cast(idmap->index);
+ if (hnsw != nullptr) {
+ return hnsw;
+ }
+ }
+
+ throw std::runtime_error("Index is not an HNSW index");
+}
+
+// Range search result wrapper
+struct RangeSearchResultWrapper {
+ faiss::RangeSearchResult result;
+ int nq;
+
+ RangeSearchResultWrapper(int nq_) : result(nq_), nq(nq_) {}
+};
+
+// ==================== Index Factory ====================
+
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_indexFactoryCreate
+ (JNIEnv* env, jclass, jint dimension, jstring description, jint metricType) {
+ FAISS_TRY
+ std::string desc = jstringToString(env, description);
+ faiss::MetricType metric = (metricType == 0) ? faiss::METRIC_L2 : faiss::METRIC_INNER_PRODUCT;
+ faiss::Index* index = faiss::index_factory(dimension, desc.c_str(), metric);
+ return reinterpret_cast(index);
+ FAISS_CATCH(env)
+ return 0;
+}
+
+// ==================== Index Operations ====================
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexDestroy
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ delete getIndex(handle);
+ FAISS_CATCH(env)
+}
+
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_indexGetDimension
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ return static_cast(getIndex(handle)->d);
+ FAISS_CATCH(env)
+ return 0;
+}
+
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_indexGetCount
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ return static_cast(getIndex(handle)->ntotal);
+ FAISS_CATCH(env)
+ return 0;
+}
+
+JNIEXPORT jboolean JNICALL Java_org_apache_paimon_faiss_FaissNative_indexIsTrained
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ return getIndex(handle)->is_trained ? JNI_TRUE : JNI_FALSE;
+ FAISS_CATCH(env)
+ return JNI_FALSE;
+}
+
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_indexGetMetricType
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ faiss::MetricType metric = getIndex(handle)->metric_type;
+ return (metric == faiss::METRIC_L2) ? 0 : 1;
+ FAISS_CATCH(env)
+ return 0;
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexTrain
+ (JNIEnv* env, jclass, jlong handle, jlong n, jfloatArray vectors) {
+ FAISS_TRY
+ jfloat* vectorData = env->GetFloatArrayElements(vectors, nullptr);
+ getIndex(handle)->train(n, vectorData);
+ env->ReleaseFloatArrayElements(vectors, vectorData, JNI_ABORT);
+ FAISS_CATCH(env)
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexAdd
+ (JNIEnv* env, jclass, jlong handle, jlong n, jfloatArray vectors) {
+ FAISS_TRY
+ jfloat* vectorData = env->GetFloatArrayElements(vectors, nullptr);
+ getIndex(handle)->add(n, vectorData);
+ env->ReleaseFloatArrayElements(vectors, vectorData, JNI_ABORT);
+ FAISS_CATCH(env)
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexAddWithIds
+ (JNIEnv* env, jclass, jlong handle, jlong n, jfloatArray vectors, jlongArray ids) {
+ FAISS_TRY
+ jfloat* vectorData = env->GetFloatArrayElements(vectors, nullptr);
+ jlong* idData = env->GetLongArrayElements(ids, nullptr);
+
+ // Convert jlong to faiss::idx_t if needed
+ std::vector faissIds(n);
+ for (jlong i = 0; i < n; i++) {
+ faissIds[i] = static_cast(idData[i]);
+ }
+
+ getIndex(handle)->add_with_ids(n, vectorData, faissIds.data());
+
+ env->ReleaseFloatArrayElements(vectors, vectorData, JNI_ABORT);
+ env->ReleaseLongArrayElements(ids, idData, JNI_ABORT);
+ FAISS_CATCH(env)
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexSearch
+ (JNIEnv* env, jclass, jlong handle, jlong n, jfloatArray queries, jint k,
+ jfloatArray distances, jlongArray labels) {
+ FAISS_TRY
+ jfloat* queryData = env->GetFloatArrayElements(queries, nullptr);
+ jfloat* distData = env->GetFloatArrayElements(distances, nullptr);
+ jlong* labelData = env->GetLongArrayElements(labels, nullptr);
+
+ // Use temporary vectors for faiss
+ std::vector faissLabels(n * k);
+
+ getIndex(handle)->search(n, queryData, k, distData, faissLabels.data());
+
+ // Copy labels back
+ for (jlong i = 0; i < n * k; i++) {
+ labelData[i] = static_cast(faissLabels[i]);
+ }
+
+ env->ReleaseFloatArrayElements(queries, queryData, JNI_ABORT);
+ env->ReleaseFloatArrayElements(distances, distData, 0);
+ env->ReleaseLongArrayElements(labels, labelData, 0);
+ FAISS_CATCH(env)
+}
+
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_indexRangeSearch
+ (JNIEnv* env, jclass, jlong handle, jlong n, jfloatArray queries, jfloat radius) {
+ FAISS_TRY
+ jfloat* queryData = env->GetFloatArrayElements(queries, nullptr);
+
+ RangeSearchResultWrapper* wrapper = new RangeSearchResultWrapper(static_cast(n));
+ getIndex(handle)->range_search(n, queryData, radius, &wrapper->result);
+
+ env->ReleaseFloatArrayElements(queries, queryData, JNI_ABORT);
+ return reinterpret_cast(wrapper);
+ FAISS_CATCH(env)
+ return 0;
+}
+
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_indexRemoveIds
+ (JNIEnv* env, jclass, jlong handle, jlongArray ids) {
+ FAISS_TRY
+ jsize n = env->GetArrayLength(ids);
+ jlong* idData = env->GetLongArrayElements(ids, nullptr);
+
+ // Create ID selector
+ std::vector faissIds(n);
+ for (jsize i = 0; i < n; i++) {
+ faissIds[i] = static_cast(idData[i]);
+ }
+ faiss::IDSelectorArray selector(n, faissIds.data());
+
+ jlong removed = static_cast(getIndex(handle)->remove_ids(selector));
+
+ env->ReleaseLongArrayElements(ids, idData, JNI_ABORT);
+ return removed;
+ FAISS_CATCH(env)
+ return 0;
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexReset
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ getIndex(handle)->reset();
+ FAISS_CATCH(env)
+}
+
+// ==================== Index I/O ====================
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexWriteToFile
+ (JNIEnv* env, jclass, jlong handle, jstring path) {
+ FAISS_TRY
+ std::string filePath = jstringToString(env, path);
+ faiss::write_index(getIndex(handle), filePath.c_str());
+ FAISS_CATCH(env)
+}
+
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_indexReadFromFile
+ (JNIEnv* env, jclass, jstring path) {
+ FAISS_TRY
+ std::string filePath = jstringToString(env, path);
+ faiss::Index* index = faiss::read_index(filePath.c_str());
+ return reinterpret_cast(index);
+ FAISS_CATCH(env)
+ return 0;
+}
+
+JNIEXPORT jbyteArray JNICALL Java_org_apache_paimon_faiss_FaissNative_indexSerialize
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ faiss::VectorIOWriter writer;
+ faiss::write_index(getIndex(handle), &writer);
+
+ jbyteArray result = env->NewByteArray(static_cast(writer.data.size()));
+ env->SetByteArrayRegion(result, 0, static_cast(writer.data.size()),
+ reinterpret_cast(writer.data.data()));
+ return result;
+ FAISS_CATCH(env)
+ return nullptr;
+}
+
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_indexDeserialize
+ (JNIEnv* env, jclass, jbyteArray data) {
+ FAISS_TRY
+ jsize length = env->GetArrayLength(data);
+ jbyte* bytes = env->GetByteArrayElements(data, nullptr);
+
+ faiss::VectorIOReader reader;
+ reader.data.resize(length);
+ memcpy(reader.data.data(), bytes, length);
+
+ faiss::Index* index = faiss::read_index(&reader);
+
+ env->ReleaseByteArrayElements(data, bytes, JNI_ABORT);
+ return reinterpret_cast(index);
+ FAISS_CATCH(env)
+ return 0;
+}
+
+// ==================== Range Search Result ====================
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_rangeSearchResultDestroy
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ delete reinterpret_cast(handle);
+ FAISS_CATCH(env)
+}
+
+JNIEXPORT jlongArray JNICALL Java_org_apache_paimon_faiss_FaissNative_rangeSearchResultGetLimits
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ RangeSearchResultWrapper* wrapper = reinterpret_cast(handle);
+ jsize n = wrapper->nq + 1;
+ jlongArray result = env->NewLongArray(n);
+
+ std::vector limits(n);
+ for (jsize i = 0; i < n; i++) {
+ limits[i] = static_cast(wrapper->result.lims[i]);
+ }
+ env->SetLongArrayRegion(result, 0, n, limits.data());
+ return result;
+ FAISS_CATCH(env)
+ return nullptr;
+}
+
+JNIEXPORT jlongArray JNICALL Java_org_apache_paimon_faiss_FaissNative_rangeSearchResultGetLabels
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ RangeSearchResultWrapper* wrapper = reinterpret_cast(handle);
+ jsize n = static_cast(wrapper->result.lims[wrapper->nq]);
+ jlongArray result = env->NewLongArray(n);
+
+ std::vector labels(n);
+ for (jsize i = 0; i < n; i++) {
+ labels[i] = static_cast(wrapper->result.labels[i]);
+ }
+ env->SetLongArrayRegion(result, 0, n, labels.data());
+ return result;
+ FAISS_CATCH(env)
+ return nullptr;
+}
+
+JNIEXPORT jfloatArray JNICALL Java_org_apache_paimon_faiss_FaissNative_rangeSearchResultGetDistances
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ RangeSearchResultWrapper* wrapper = reinterpret_cast(handle);
+ jsize n = static_cast(wrapper->result.lims[wrapper->nq]);
+ jfloatArray result = env->NewFloatArray(n);
+ env->SetFloatArrayRegion(result, 0, n, wrapper->result.distances);
+ return result;
+ FAISS_CATCH(env)
+ return nullptr;
+}
+
+// ==================== IVF Index Specific ====================
+
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_ivfGetNprobe
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ return static_cast(getIndexIVF(handle)->nprobe);
+ FAISS_CATCH(env)
+ return 0;
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_ivfSetNprobe
+ (JNIEnv* env, jclass, jlong handle, jint nprobe) {
+ FAISS_TRY
+ getIndexIVF(handle)->nprobe = static_cast(nprobe);
+ FAISS_CATCH(env)
+}
+
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_ivfGetNlist
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ return static_cast(getIndexIVF(handle)->nlist);
+ FAISS_CATCH(env)
+ return 0;
+}
+
+// ==================== HNSW Index Specific ====================
+
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_hnswGetEfSearch
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ return static_cast(getIndexHNSW(handle)->hnsw.efSearch);
+ FAISS_CATCH(env)
+ return 0;
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_hnswSetEfSearch
+ (JNIEnv* env, jclass, jlong handle, jint efSearch) {
+ FAISS_TRY
+ getIndexHNSW(handle)->hnsw.efSearch = static_cast(efSearch);
+ FAISS_CATCH(env)
+}
+
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_hnswGetEfConstruction
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ return static_cast(getIndexHNSW(handle)->hnsw.efConstruction);
+ FAISS_CATCH(env)
+ return 0;
+}
+
+// ==================== Utility ====================
+
+JNIEXPORT jstring JNICALL Java_org_apache_paimon_faiss_FaissNative_getVersion
+ (JNIEnv* env, jclass) {
+ // Faiss doesn't have a built-in version function, so we return our binding version
+ return env->NewStringUTF("1.7.4");
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_setNumThreads
+ (JNIEnv* env, jclass, jint numThreads) {
+#ifdef _OPENMP
+ omp_set_num_threads(numThreads);
+#else
+ // OpenMP not available, ignore
+ (void)numThreads;
+#endif
+}
+
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_getNumThreads
+ (JNIEnv* env, jclass) {
+#ifdef _OPENMP
+ return omp_get_max_threads();
+#else
+ return 1;
+#endif
+}
+
diff --git a/paimon-faiss-jni/src/main/native/paimon_faiss_jni.h b/paimon-faiss-jni/src/main/native/paimon_faiss_jni.h
new file mode 100644
index 000000000000..27086f91ceab
--- /dev/null
+++ b/paimon-faiss-jni/src/main/native/paimon_faiss_jni.h
@@ -0,0 +1,273 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PAIMON_FAISS_JNI_H
+#define PAIMON_FAISS_JNI_H
+
+#include
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexFactoryCreate
+ * Signature: (ILjava/lang/String;I)J
+ */
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_indexFactoryCreate
+ (JNIEnv *, jclass, jint, jstring, jint);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexDestroy
+ * Signature: (J)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexDestroy
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexGetDimension
+ * Signature: (J)I
+ */
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_indexGetDimension
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexGetCount
+ * Signature: (J)J
+ */
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_indexGetCount
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexIsTrained
+ * Signature: (J)Z
+ */
+JNIEXPORT jboolean JNICALL Java_org_apache_paimon_faiss_FaissNative_indexIsTrained
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexGetMetricType
+ * Signature: (J)I
+ */
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_indexGetMetricType
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexTrain
+ * Signature: (JJ[F)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexTrain
+ (JNIEnv *, jclass, jlong, jlong, jfloatArray);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexAdd
+ * Signature: (JJ[F)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexAdd
+ (JNIEnv *, jclass, jlong, jlong, jfloatArray);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexAddWithIds
+ * Signature: (JJ[F[J)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexAddWithIds
+ (JNIEnv *, jclass, jlong, jlong, jfloatArray, jlongArray);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexSearch
+ * Signature: (JJ[FI[F[J)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexSearch
+ (JNIEnv *, jclass, jlong, jlong, jfloatArray, jint, jfloatArray, jlongArray);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexRangeSearch
+ * Signature: (JJ[FF)J
+ */
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_indexRangeSearch
+ (JNIEnv *, jclass, jlong, jlong, jfloatArray, jfloat);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexRemoveIds
+ * Signature: (J[J)J
+ */
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_indexRemoveIds
+ (JNIEnv *, jclass, jlong, jlongArray);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexReset
+ * Signature: (J)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexReset
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexWriteToFile
+ * Signature: (JLjava/lang/String;)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexWriteToFile
+ (JNIEnv *, jclass, jlong, jstring);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexReadFromFile
+ * Signature: (Ljava/lang/String;)J
+ */
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_indexReadFromFile
+ (JNIEnv *, jclass, jstring);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexSerialize
+ * Signature: (J)[B
+ */
+JNIEXPORT jbyteArray JNICALL Java_org_apache_paimon_faiss_FaissNative_indexSerialize
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexDeserialize
+ * Signature: ([B)J
+ */
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_indexDeserialize
+ (JNIEnv *, jclass, jbyteArray);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: rangeSearchResultDestroy
+ * Signature: (J)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_rangeSearchResultDestroy
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: rangeSearchResultGetLimits
+ * Signature: (J)[J
+ */
+JNIEXPORT jlongArray JNICALL Java_org_apache_paimon_faiss_FaissNative_rangeSearchResultGetLimits
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: rangeSearchResultGetLabels
+ * Signature: (J)[J
+ */
+JNIEXPORT jlongArray JNICALL Java_org_apache_paimon_faiss_FaissNative_rangeSearchResultGetLabels
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: rangeSearchResultGetDistances
+ * Signature: (J)[F
+ */
+JNIEXPORT jfloatArray JNICALL Java_org_apache_paimon_faiss_FaissNative_rangeSearchResultGetDistances
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: ivfGetNprobe
+ * Signature: (J)I
+ */
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_ivfGetNprobe
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: ivfSetNprobe
+ * Signature: (JI)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_ivfSetNprobe
+ (JNIEnv *, jclass, jlong, jint);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: ivfGetNlist
+ * Signature: (J)I
+ */
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_ivfGetNlist
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: hnswGetEfSearch
+ * Signature: (J)I
+ */
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_hnswGetEfSearch
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: hnswSetEfSearch
+ * Signature: (JI)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_hnswSetEfSearch
+ (JNIEnv *, jclass, jlong, jint);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: hnswGetEfConstruction
+ * Signature: (J)I
+ */
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_hnswGetEfConstruction
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: getVersion
+ * Signature: ()Ljava/lang/String;
+ */
+JNIEXPORT jstring JNICALL Java_org_apache_paimon_faiss_FaissNative_getVersion
+ (JNIEnv *, jclass);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: setNumThreads
+ * Signature: (I)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_setNumThreads
+ (JNIEnv *, jclass, jint);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: getNumThreads
+ * Signature: ()I
+ */
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_getNumThreads
+ (JNIEnv *, jclass);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* PAIMON_FAISS_JNI_H */
+
diff --git a/paimon-faiss-jni/src/main/resources/darwin/aarch64/libpaimon_faiss_jni.dylib b/paimon-faiss-jni/src/main/resources/darwin/aarch64/libpaimon_faiss_jni.dylib
new file mode 100755
index 000000000000..f6958ee5ab83
Binary files /dev/null and b/paimon-faiss-jni/src/main/resources/darwin/aarch64/libpaimon_faiss_jni.dylib differ
diff --git a/paimon-faiss-jni/src/main/resources/darwin/amd64/.gitkeep b/paimon-faiss-jni/src/main/resources/darwin/amd64/.gitkeep
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/paimon-faiss-jni/src/main/resources/linux/aarch64/.gitkeep b/paimon-faiss-jni/src/main/resources/linux/aarch64/.gitkeep
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/paimon-faiss-jni/src/main/resources/linux/amd64/libgcc_s.so.1 b/paimon-faiss-jni/src/main/resources/linux/amd64/libgcc_s.so.1
new file mode 100755
index 000000000000..6b880fa78712
Binary files /dev/null and b/paimon-faiss-jni/src/main/resources/linux/amd64/libgcc_s.so.1 differ
diff --git a/paimon-faiss-jni/src/main/resources/linux/amd64/libgfortran.so.3 b/paimon-faiss-jni/src/main/resources/linux/amd64/libgfortran.so.3
new file mode 100755
index 000000000000..ea7d78c8381d
Binary files /dev/null and b/paimon-faiss-jni/src/main/resources/linux/amd64/libgfortran.so.3 differ
diff --git a/paimon-faiss-jni/src/main/resources/linux/amd64/libgomp.so.1 b/paimon-faiss-jni/src/main/resources/linux/amd64/libgomp.so.1
new file mode 100755
index 000000000000..7efd3dbd2950
Binary files /dev/null and b/paimon-faiss-jni/src/main/resources/linux/amd64/libgomp.so.1 differ
diff --git a/paimon-faiss-jni/src/main/resources/linux/amd64/libopenblas.so.0 b/paimon-faiss-jni/src/main/resources/linux/amd64/libopenblas.so.0
new file mode 100755
index 000000000000..58c53d24bdfd
Binary files /dev/null and b/paimon-faiss-jni/src/main/resources/linux/amd64/libopenblas.so.0 differ
diff --git a/paimon-faiss-jni/src/main/resources/linux/amd64/libpaimon_faiss_jni.so b/paimon-faiss-jni/src/main/resources/linux/amd64/libpaimon_faiss_jni.so
new file mode 100755
index 000000000000..fcede619b159
Binary files /dev/null and b/paimon-faiss-jni/src/main/resources/linux/amd64/libpaimon_faiss_jni.so differ
diff --git a/paimon-faiss-jni/src/main/resources/linux/amd64/libquadmath.so.0 b/paimon-faiss-jni/src/main/resources/linux/amd64/libquadmath.so.0
new file mode 100755
index 000000000000..bfefffdfe2c3
Binary files /dev/null and b/paimon-faiss-jni/src/main/resources/linux/amd64/libquadmath.so.0 differ
diff --git a/paimon-faiss-jni/src/main/resources/paimon-faiss-version.properties b/paimon-faiss-jni/src/main/resources/paimon-faiss-version.properties
new file mode 100644
index 000000000000..2ec78e60a4e9
--- /dev/null
+++ b/paimon-faiss-jni/src/main/resources/paimon-faiss-version.properties
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Paimon Faiss Version Information
+version=${project.version}
+faiss.version=1.7.4
+build.timestamp=${maven.build.timestamp}
+
diff --git a/paimon-faiss-jni/src/test/java/org/apache/paimon/faiss/IndexTest.java b/paimon-faiss-jni/src/test/java/org/apache/paimon/faiss/IndexTest.java
new file mode 100644
index 000000000000..72d07fafa20b
--- /dev/null
+++ b/paimon-faiss-jni/src/test/java/org/apache/paimon/faiss/IndexTest.java
@@ -0,0 +1,311 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import java.io.File;
+import java.nio.file.Path;
+import java.util.Random;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+/**
+ * Tests for the Faiss Index class.
+ *
+ *
Note: These tests require the native library to be built and available.
+ * They will be skipped if the native library is not found.
+ */
+class IndexTest {
+
+ private static final int DIMENSION = 128;
+ private static final int NUM_VECTORS = 1000;
+ private static final int K = 10;
+
+ @Test
+ void testFlatIndexBasicOperations() {
+ try (Index index = IndexFactory.createFlat(DIMENSION, MetricType.L2)) {
+ assertEquals(DIMENSION, index.getDimension());
+ assertEquals(0, index.getCount());
+ assertTrue(index.isTrained());
+ assertEquals(MetricType.L2, index.getMetricType());
+
+ // Add vectors
+ float[] vectors = generateRandomVectors(NUM_VECTORS, DIMENSION);
+ index.add(vectors);
+ assertEquals(NUM_VECTORS, index.getCount());
+
+ // Search
+ float[] query = generateRandomVectors(1, DIMENSION);
+ SearchResult result = index.searchSingle(query, K);
+
+ assertEquals(1, result.getNumQueries());
+ assertEquals(K, result.getK());
+ assertEquals(K, result.getLabelsForQuery(0).length);
+ assertEquals(K, result.getDistancesForQuery(0).length);
+
+ // Verify labels are in valid range
+ for (long label : result.getLabels()) {
+ assertTrue(label >= 0 && label < NUM_VECTORS,
+ "Label " + label + " out of range");
+ }
+
+ // Verify distances are non-negative for L2
+ for (float distance : result.getDistances()) {
+ assertTrue(distance >= 0, "Distance should be non-negative for L2");
+ }
+ }
+ }
+
+ @Test
+ void testFlatIndexWithIds() {
+ try (Index index = IndexFactory.createFlatWithIds(DIMENSION, MetricType.L2)) {
+ float[] vectors = generateRandomVectors(NUM_VECTORS, DIMENSION);
+ long[] ids = new long[NUM_VECTORS];
+ for (int i = 0; i < NUM_VECTORS; i++) {
+ ids[i] = i * 100; // Use custom IDs
+ }
+
+ index.addWithIds(vectors, ids);
+ assertEquals(NUM_VECTORS, index.getCount());
+
+ // Search should return our custom IDs
+ float[] query = generateRandomVectors(1, DIMENSION);
+ SearchResult result = index.searchSingle(query, K);
+
+ for (long label : result.getLabels()) {
+ assertTrue(label % 100 == 0, "Label should be a multiple of 100");
+ }
+ }
+ }
+
+ @Test
+ void testBatchSearch() {
+ try (Index index = IndexFactory.createFlat(DIMENSION)) {
+ float[] vectors = generateRandomVectors(NUM_VECTORS, DIMENSION);
+ index.add(vectors);
+
+ int numQueries = 5;
+ float[] queries = generateRandomVectors(numQueries, DIMENSION);
+ SearchResult result = index.search(queries, K);
+
+ assertEquals(numQueries, result.getNumQueries());
+ assertEquals(K, result.getK());
+ assertEquals(numQueries * K, result.getLabels().length);
+ assertEquals(numQueries * K, result.getDistances().length);
+
+ // Test per-query accessors
+ for (int q = 0; q < numQueries; q++) {
+ long[] labels = result.getLabelsForQuery(q);
+ float[] distances = result.getDistancesForQuery(q);
+ assertEquals(K, labels.length);
+ assertEquals(K, distances.length);
+ }
+ }
+ }
+
+ @Test
+ void testInnerProductMetric() {
+ try (Index index = IndexFactory.createFlat(DIMENSION, MetricType.INNER_PRODUCT)) {
+ assertEquals(MetricType.INNER_PRODUCT, index.getMetricType());
+
+ float[] vectors = generateRandomVectors(NUM_VECTORS, DIMENSION);
+ index.add(vectors);
+
+ float[] query = generateRandomVectors(1, DIMENSION);
+ SearchResult result = index.searchSingle(query, K);
+
+ // For inner product, higher is better, so first result should have highest score
+ float[] distances = result.getDistancesForQuery(0);
+ for (int i = 1; i < K; i++) {
+ assertTrue(distances[i - 1] >= distances[i],
+ "Distances should be sorted in descending order for inner product");
+ }
+ }
+ }
+
+ @Test
+ void testIndexReset() {
+ try (Index index = IndexFactory.createFlat(DIMENSION)) {
+ float[] vectors = generateRandomVectors(100, DIMENSION);
+ index.add(vectors);
+ assertEquals(100, index.getCount());
+
+ index.reset();
+ assertEquals(0, index.getCount());
+
+ // Can add again after reset
+ index.add(vectors);
+ assertEquals(100, index.getCount());
+ }
+ }
+
+ @Test
+ void testIndexSerialization(@TempDir Path tempDir) {
+ float[] vectors = generateRandomVectors(NUM_VECTORS, DIMENSION);
+ float[] query = generateRandomVectors(1, DIMENSION);
+ SearchResult originalResult;
+
+ // Create and populate index
+ try (Index index = IndexFactory.createFlat(DIMENSION)) {
+ index.add(vectors);
+ originalResult = index.searchSingle(query, K);
+
+ // Test file I/O
+ File indexFile = tempDir.resolve("test.index").toFile();
+ index.writeToFile(indexFile);
+
+ try (Index loadedIndex = Index.readFromFile(indexFile)) {
+ assertEquals(DIMENSION, loadedIndex.getDimension());
+ assertEquals(NUM_VECTORS, loadedIndex.getCount());
+
+ SearchResult loadedResult = loadedIndex.searchSingle(query, K);
+ assertArrayEquals(originalResult.getLabels(), loadedResult.getLabels());
+ }
+ }
+
+ // Test byte array serialization
+ try (Index index = IndexFactory.createFlat(DIMENSION)) {
+ index.add(vectors);
+ byte[] serialized = index.serialize();
+ assertNotNull(serialized);
+ assertTrue(serialized.length > 0);
+
+ try (Index deserializedIndex = Index.deserialize(serialized)) {
+ assertEquals(DIMENSION, deserializedIndex.getDimension());
+ assertEquals(NUM_VECTORS, deserializedIndex.getCount());
+
+ SearchResult deserializedResult = deserializedIndex.searchSingle(query, K);
+ assertArrayEquals(originalResult.getLabels(), deserializedResult.getLabels());
+ }
+ }
+ }
+
+ @Test
+ void testIndexFactoryDescriptions() {
+ // Test various index factory strings
+ String[] descriptions = {
+ "Flat",
+ "IDMap,Flat",
+ "HNSW32",
+ "HNSW32,Flat"
+ };
+
+ for (String desc : descriptions) {
+ try (Index index = IndexFactory.create(DIMENSION, desc, MetricType.L2)) {
+ assertEquals(DIMENSION, index.getDimension());
+ assertNotNull(index.toString());
+ }
+ }
+ }
+
+ @Test
+ void testHNSWIndex() {
+ try (Index index = IndexFactory.createHNSW(DIMENSION, 32, MetricType.L2)) {
+ assertTrue(index.isTrained()); // HNSW doesn't need training
+
+ float[] vectors = generateRandomVectors(NUM_VECTORS, DIMENSION);
+ index.add(vectors);
+
+ // Get and set efSearch
+ int efSearch = IndexHNSW.getEfSearch(index);
+ assertTrue(efSearch > 0);
+
+ IndexHNSW.setEfSearch(index, 64);
+ assertEquals(64, IndexHNSW.getEfSearch(index));
+
+ // Search
+ float[] query = generateRandomVectors(1, DIMENSION);
+ SearchResult result = index.searchSingle(query, K);
+ assertEquals(K, result.getLabels().length);
+ }
+ }
+
+ @Test
+ void testErrorHandling() {
+ // Test invalid dimension
+ assertThrows(IllegalArgumentException.class, () -> {
+ IndexFactory.create(0, "Flat", MetricType.L2);
+ });
+
+ assertThrows(IllegalArgumentException.class, () -> {
+ IndexFactory.create(-1, "Flat", MetricType.L2);
+ });
+
+ // Test null description
+ assertThrows(IllegalArgumentException.class, () -> {
+ IndexFactory.create(DIMENSION, null, MetricType.L2);
+ });
+
+ // Test vector dimension mismatch
+ try (Index index = IndexFactory.createFlat(DIMENSION)) {
+ float[] wrongDimVectors = new float[10]; // Wrong size
+ assertThrows(IllegalArgumentException.class, () -> {
+ index.addSingle(wrongDimVectors);
+ });
+ }
+
+ // Test closed index
+ Index closedIndex = IndexFactory.createFlat(DIMENSION);
+ closedIndex.close();
+ assertThrows(IllegalStateException.class, () -> {
+ closedIndex.getCount();
+ });
+ }
+
+ @Test
+ void testSearchResultAccessors() {
+ try (Index index = IndexFactory.createFlat(DIMENSION)) {
+ float[] vectors = generateRandomVectors(100, DIMENSION);
+ index.add(vectors);
+
+ float[] queries = generateRandomVectors(3, DIMENSION);
+ SearchResult result = index.search(queries, 5);
+
+ // Test individual accessors
+ for (int q = 0; q < 3; q++) {
+ for (int n = 0; n < 5; n++) {
+ long label = result.getLabel(q, n);
+ float distance = result.getDistance(q, n);
+ assertTrue(label >= 0 && label < 100);
+ assertTrue(distance >= 0);
+ }
+ }
+
+ // Test out of bounds
+ assertThrows(IndexOutOfBoundsException.class, () -> {
+ result.getLabel(10, 0);
+ });
+ assertThrows(IndexOutOfBoundsException.class, () -> {
+ result.getLabel(0, 10);
+ });
+ }
+ }
+
+ private float[] generateRandomVectors(int n, int d) {
+ Random random = new Random(42);
+ float[] vectors = new float[n * d];
+ for (int i = 0; i < vectors.length; i++) {
+ vectors[i] = random.nextFloat();
+ }
+ return vectors;
+ }
+}
+
diff --git a/paimon-faiss/pom.xml b/paimon-faiss/pom.xml
new file mode 100644
index 000000000000..6e0d6abf2307
--- /dev/null
+++ b/paimon-faiss/pom.xml
@@ -0,0 +1,104 @@
+
+
+
+ 4.0.0
+
+
+ paimon-parent
+ org.apache.paimon
+ 1.4-SNAPSHOT
+
+
+ paimon-faiss
+ Paimon : Faiss
+
+
+
+ org.apache.paimon
+ paimon-common
+ ${project.version}
+
+
+
+ org.apache.paimon
+ paimon-faiss-jni
+ ${project.version}
+
+
+
+
+
+ org.apache.paimon
+ paimon-core
+ ${project.version}
+ test
+
+
+
+ org.apache.paimon
+ paimon-format
+ ${project.version}
+ test
+
+
+
+ org.apache.paimon
+ paimon-test-utils
+ ${project.version}
+ test
+
+
+
+ org.apache.hadoop
+ hadoop-client
+ ${hadoop.version}
+ test
+
+
+ log4j
+ log4j
+
+
+ org.slf4j
+ slf4j-log4j12
+
+
+
+
+
+
+
+
+ maven-jar-plugin
+
+
+
+ test-jar
+
+
+
+
+
+
+
+
+
diff --git a/paimon-faiss/src/main/java/org/apache/paimon/faiss/index/FaissIndex.java b/paimon-faiss/src/main/java/org/apache/paimon/faiss/index/FaissIndex.java
new file mode 100644
index 000000000000..792095732b0e
--- /dev/null
+++ b/paimon-faiss/src/main/java/org/apache/paimon/faiss/index/FaissIndex.java
@@ -0,0 +1,426 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss.index;
+
+import org.apache.paimon.faiss.Index;
+import org.apache.paimon.faiss.IndexFactory;
+import org.apache.paimon.faiss.IndexHNSW;
+import org.apache.paimon.faiss.IndexIVF;
+import org.apache.paimon.faiss.MetricType;
+
+import java.io.Closeable;
+
+/**
+ * A wrapper class for FAISS index that manages the native index pointer.
+ *
+ *
This class provides a safe Java API for interacting with native FAISS indices, including
+ * automatic resource management through the {@link Closeable} interface.
+ *
+ *
This implementation uses the paimon-faiss-jni library for native FAISS bindings.
+ */
+public class FaissIndex implements Closeable {
+
+ private final Index index;
+ private final int dimension;
+ private final FaissVectorMetric metric;
+ private final FaissIndexType indexType;
+ private volatile boolean closed = false;
+
+ private FaissIndex(
+ Index index, int dimension, FaissVectorMetric metric, FaissIndexType indexType) {
+ this.index = index;
+ this.dimension = dimension;
+ this.metric = metric;
+ this.indexType = indexType;
+ }
+
+ /**
+ * Create a flat index (exact search).
+ *
+ * @param dimension the dimension of vectors
+ * @param metric the distance metric
+ * @return the created index
+ */
+ public static FaissIndex createFlatIndex(int dimension, FaissVectorMetric metric) {
+ MetricType metricType = toMetricType(metric);
+ Index index = IndexFactory.create(dimension, "IDMap,Flat", metricType);
+ return new FaissIndex(index, dimension, metric, FaissIndexType.FLAT);
+ }
+
+ /**
+ * Create an HNSW index.
+ *
+ * @param dimension the dimension of vectors
+ * @param m the number of connections per layer
+ * @param efConstruction the size of the dynamic candidate list for construction
+ * @param metric the distance metric
+ * @return the created index
+ */
+ public static FaissIndex createHnswIndex(
+ int dimension, int m, int efConstruction, FaissVectorMetric metric) {
+ MetricType metricType = toMetricType(metric);
+ // Use IDMap2 wrapper to support addWithIds and get efConstruction
+ String description = String.format("IDMap2,HNSW%d", m);
+ Index index = IndexFactory.create(dimension, description, metricType);
+ return new FaissIndex(index, dimension, metric, FaissIndexType.HNSW);
+ }
+
+ /**
+ * Create an IVF index.
+ *
+ * @param dimension the dimension of vectors
+ * @param nlist the number of inverted lists (clusters)
+ * @param metric the distance metric
+ * @return the created index
+ */
+ public static FaissIndex createIvfIndex(int dimension, int nlist, FaissVectorMetric metric) {
+ MetricType metricType = toMetricType(metric);
+ String description = String.format("IDMap,IVF%d,Flat", nlist);
+ Index index = IndexFactory.create(dimension, description, metricType);
+ return new FaissIndex(index, dimension, metric, FaissIndexType.IVF);
+ }
+
+ /**
+ * Create an IVF-PQ index.
+ *
+ * @param dimension the dimension of vectors
+ * @param nlist the number of inverted lists (clusters)
+ * @param m the number of sub-quantizers
+ * @param nbits the number of bits per sub-quantizer
+ * @param metric the distance metric
+ * @return the created index
+ */
+ public static FaissIndex createIvfPqIndex(
+ int dimension, int nlist, int m, int nbits, FaissVectorMetric metric) {
+ MetricType metricType = toMetricType(metric);
+ String description = String.format("IDMap,IVF%d,PQ%dx%d", nlist, m, nbits);
+ Index index = IndexFactory.create(dimension, description, metricType);
+ return new FaissIndex(index, dimension, metric, FaissIndexType.IVF_PQ);
+ }
+
+ /**
+ * Load an index from serialized data.
+ *
+ * @param data the serialized index data
+ * @return the loaded index
+ */
+ public static FaissIndex fromBytes(byte[] data) {
+ Index index = Index.deserialize(data);
+ int dimension = index.getDimension();
+ // Note: metric and type are not stored in serialized form, use defaults
+ return new FaissIndex(index, dimension, FaissVectorMetric.L2, FaissIndexType.UNKNOWN);
+ }
+
+ /**
+ * Add vectors to the index.
+ *
+ * @param vectors the vectors to add (each row is a vector)
+ */
+ public void add(float[][] vectors) {
+ ensureOpen();
+ if (vectors.length == 0) {
+ return;
+ }
+ float[] flattened = flatten(vectors);
+ index.add(flattened);
+ }
+
+ /**
+ * Add vectors with IDs to the index.
+ *
+ * @param vectors the vectors to add (each row is a vector)
+ * @param ids the IDs for the vectors
+ */
+ public void addWithIds(float[][] vectors, long[] ids) {
+ ensureOpen();
+ if (vectors.length == 0) {
+ return;
+ }
+ if (vectors.length != ids.length) {
+ throw new IllegalArgumentException(
+ "Number of vectors and IDs must match: "
+ + vectors.length
+ + " vs "
+ + ids.length);
+ }
+ float[] flattened = flatten(vectors);
+ index.addWithIds(flattened, ids);
+ }
+
+ /**
+ * Add a single vector to the index.
+ *
+ * @param vector the vector to add
+ */
+ public void add(float[] vector) {
+ ensureOpen();
+ checkDimension(vector);
+ index.addSingle(vector);
+ }
+
+ /**
+ * Add a single vector with ID to the index.
+ *
+ * @param vector the vector to add
+ * @param id the ID for the vector
+ */
+ public void addWithId(float[] vector, long id) {
+ ensureOpen();
+ checkDimension(vector);
+ index.addWithIds(vector, new long[] {id});
+ }
+
+ /**
+ * Train the index (required for IVF-based indices).
+ *
+ * @param trainingVectors the training vectors
+ */
+ public void train(float[][] trainingVectors) {
+ ensureOpen();
+ if (trainingVectors.length == 0) {
+ return;
+ }
+ float[] flattened = flatten(trainingVectors);
+ index.train(flattened);
+ }
+
+ /**
+ * Check if the index is trained.
+ *
+ * @return true if the index is trained
+ */
+ public boolean isTrained() {
+ ensureOpen();
+ return index.isTrained();
+ }
+
+ /**
+ * Search for k nearest neighbors.
+ *
+ * @param queries the query vectors
+ * @param k the number of nearest neighbors to return
+ * @return search results containing distances and IDs
+ */
+ public SearchResult search(float[][] queries, int k) {
+ ensureOpen();
+ if (queries.length == 0) {
+ return new SearchResult(new float[0], new long[0], 0, k);
+ }
+ float[] flattened = flatten(queries);
+ org.apache.paimon.faiss.SearchResult result = index.search(flattened, k);
+ return new SearchResult(result.getDistances(), result.getLabels(), queries.length, k);
+ }
+
+ /**
+ * Search for k nearest neighbors for a single query.
+ *
+ * @param query the query vector
+ * @param k the number of nearest neighbors to return
+ * @return search results containing distances and IDs
+ */
+ public SearchResult search(float[] query, int k) {
+ ensureOpen();
+ checkDimension(query);
+ org.apache.paimon.faiss.SearchResult result = index.searchSingle(query, k);
+ return new SearchResult(result.getDistances(), result.getLabels(), 1, k);
+ }
+
+ /**
+ * Set HNSW search parameter efSearch.
+ *
+ * @param efSearch the size of the dynamic candidate list for search
+ */
+ public void setHnswEfSearch(int efSearch) {
+ ensureOpen();
+ IndexHNSW.setEfSearch(index, efSearch);
+ }
+
+ /**
+ * Set IVF search parameter nprobe.
+ *
+ * @param nprobe the number of clusters to visit during search
+ */
+ public void setIvfNprobe(int nprobe) {
+ ensureOpen();
+ IndexIVF.setNprobe(index, nprobe);
+ }
+
+ /**
+ * Get the number of vectors in the index.
+ *
+ * @return the number of vectors
+ */
+ public long size() {
+ ensureOpen();
+ return index.getCount();
+ }
+
+ /**
+ * Get the dimension of vectors in the index.
+ *
+ * @return the dimension
+ */
+ public int dimension() {
+ return dimension;
+ }
+
+ /**
+ * Get the metric used by this index.
+ *
+ * @return the metric
+ */
+ public FaissVectorMetric metric() {
+ return metric;
+ }
+
+ /**
+ * Get the type of this index.
+ *
+ * @return the index type
+ */
+ public FaissIndexType indexType() {
+ return indexType;
+ }
+
+ /**
+ * Serialize the index to a byte array.
+ *
+ * @return the serialized index
+ */
+ public byte[] toBytes() {
+ ensureOpen();
+ return index.serialize();
+ }
+
+ /** Reset the index (remove all vectors). */
+ public void reset() {
+ ensureOpen();
+ index.reset();
+ }
+
+ @Override
+ public void close() {
+ if (!closed) {
+ synchronized (this) {
+ if (!closed) {
+ index.close();
+ closed = true;
+ }
+ }
+ }
+ }
+
+ private void ensureOpen() {
+ if (closed) {
+ throw new IllegalStateException("Index has been closed");
+ }
+ }
+
+ private void checkDimension(float[] vector) {
+ if (vector.length != dimension) {
+ throw new IllegalArgumentException(
+ "Vector dimension mismatch: expected " + dimension + ", got " + vector.length);
+ }
+ }
+
+ private float[] flatten(float[][] vectors) {
+ int n = vectors.length;
+ int d = vectors[0].length;
+ float[] result = new float[n * d];
+ for (int i = 0; i < n; i++) {
+ if (vectors[i].length != d) {
+ throw new IllegalArgumentException(
+ "All vectors must have the same dimension: expected "
+ + d
+ + ", got "
+ + vectors[i].length
+ + " at index "
+ + i);
+ }
+ System.arraycopy(vectors[i], 0, result, i * d, d);
+ }
+ return result;
+ }
+
+ private static MetricType toMetricType(FaissVectorMetric metric) {
+ switch (metric) {
+ case L2:
+ return MetricType.L2;
+ case INNER_PRODUCT:
+ return MetricType.INNER_PRODUCT;
+ default:
+ throw new IllegalArgumentException("Unknown metric: " + metric);
+ }
+ }
+
+ /** Result of a search operation. */
+ public static class SearchResult {
+ private final float[] distances;
+ private final long[] labels;
+ private final int numQueries;
+ private final int k;
+
+ public SearchResult(float[] distances, long[] labels, int numQueries, int k) {
+ this.distances = distances;
+ this.labels = labels;
+ this.numQueries = numQueries;
+ this.k = k;
+ }
+
+ public float[] getDistances() {
+ return distances;
+ }
+
+ public long[] getLabels() {
+ return labels;
+ }
+
+ public int getNumQueries() {
+ return numQueries;
+ }
+
+ public int getK() {
+ return k;
+ }
+
+ /**
+ * Get distances for a specific query.
+ *
+ * @param queryIndex the query index
+ * @return the distances for that query
+ */
+ public float[] getDistancesForQuery(int queryIndex) {
+ float[] result = new float[k];
+ System.arraycopy(distances, queryIndex * k, result, 0, k);
+ return result;
+ }
+
+ /**
+ * Get labels for a specific query.
+ *
+ * @param queryIndex the query index
+ * @return the labels for that query
+ */
+ public long[] getLabelsForQuery(int queryIndex) {
+ long[] result = new long[k];
+ System.arraycopy(labels, queryIndex * k, result, 0, k);
+ return result;
+ }
+ }
+}
diff --git a/paimon-faiss/src/main/java/org/apache/paimon/faiss/index/FaissIndexType.java b/paimon-faiss/src/main/java/org/apache/paimon/faiss/index/FaissIndexType.java
new file mode 100644
index 000000000000..3756d622948d
--- /dev/null
+++ b/paimon-faiss/src/main/java/org/apache/paimon/faiss/index/FaissIndexType.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss.index;
+
+/** Enumeration of supported FAISS index types. */
+public enum FaissIndexType {
+
+ /** Flat index - exact brute-force search. */
+ FLAT("Flat"),
+
+ /** HNSW (Hierarchical Navigable Small World) graph-based index. */
+ HNSW("HNSW"),
+
+ /** IVF (Inverted File) index with flat vectors. */
+ IVF("IVF"),
+
+ /** IVF-PQ (Inverted File with Product Quantization) index. */
+ IVF_PQ("IVF_PQ"),
+
+ /** Unknown index type (e.g., loaded from serialized data). */
+ UNKNOWN("Unknown");
+
+ private final String name;
+
+ FaissIndexType(String name) {
+ this.name = name;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public static FaissIndexType fromString(String name) {
+ for (FaissIndexType type : values()) {
+ if (type.name.equalsIgnoreCase(name)) {
+ return type;
+ }
+ }
+ throw new IllegalArgumentException("Unknown index type: " + name);
+ }
+}
diff --git a/paimon-faiss/src/main/java/org/apache/paimon/faiss/index/FaissVectorGlobalIndexReader.java b/paimon-faiss/src/main/java/org/apache/paimon/faiss/index/FaissVectorGlobalIndexReader.java
new file mode 100644
index 000000000000..c9eae4d93c90
--- /dev/null
+++ b/paimon-faiss/src/main/java/org/apache/paimon/faiss/index/FaissVectorGlobalIndexReader.java
@@ -0,0 +1,342 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss.index;
+
+import org.apache.paimon.fs.SeekableInputStream;
+import org.apache.paimon.globalindex.GlobalIndexIOMeta;
+import org.apache.paimon.globalindex.GlobalIndexReader;
+import org.apache.paimon.globalindex.GlobalIndexResult;
+import org.apache.paimon.globalindex.io.GlobalIndexFileReader;
+import org.apache.paimon.predicate.FieldRef;
+import org.apache.paimon.predicate.VectorSearch;
+import org.apache.paimon.types.ArrayType;
+import org.apache.paimon.types.DataType;
+import org.apache.paimon.types.FloatType;
+import org.apache.paimon.utils.IOUtils;
+import org.apache.paimon.utils.RoaringNavigableMap64;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Optional;
+import java.util.PriorityQueue;
+
+/**
+ * Vector global index reader using FAISS.
+ *
+ *
This implementation uses FAISS for efficient approximate nearest neighbor search.
+ */
+public class FaissVectorGlobalIndexReader implements GlobalIndexReader {
+
+ private static final int VERSION = 1;
+
+ private final List indices;
+ private final List ioMetas;
+ private final GlobalIndexFileReader fileReader;
+ private final DataType fieldType;
+ private final FaissVectorIndexOptions options;
+ private volatile boolean indicesLoaded = false;
+
+ public FaissVectorGlobalIndexReader(
+ GlobalIndexFileReader fileReader,
+ List ioMetas,
+ DataType fieldType,
+ FaissVectorIndexOptions options) {
+ this.fileReader = fileReader;
+ this.ioMetas = ioMetas;
+ this.fieldType = fieldType;
+ this.options = options;
+ this.indices = new ArrayList<>();
+ }
+
+ @Override
+ public Optional visitVectorSearch(VectorSearch vectorSearch) {
+ try {
+ ensureLoadIndices();
+ return Optional.ofNullable(search(vectorSearch));
+ } catch (IOException e) {
+ throw new RuntimeException(
+ String.format(
+ "Failed to search FAISS vector index with fieldName=%s, limit=%d",
+ vectorSearch.fieldName(), vectorSearch.limit()),
+ e);
+ }
+ }
+
+ private GlobalIndexResult search(VectorSearch vectorSearch) throws IOException {
+ validateVectorType(vectorSearch.vector());
+ float[] queryVector = (float[]) vectorSearch.vector();
+ int limit = vectorSearch.limit();
+
+ // Collect results from all indices using a min-heap
+ PriorityQueue result =
+ new PriorityQueue<>(Comparator.comparingDouble(sr -> sr.score));
+
+ RoaringNavigableMap64 includeRowIds = vectorSearch.includeRowIds();
+
+ // When filtering is enabled, we need to fetch more results to ensure
+ // we have enough after filtering. Use a multiplier based on index size.
+ int searchK = limit;
+ if (includeRowIds != null) {
+ // Fetch more results when filtering - up to 10x the limit or all filtered IDs
+ searchK = Math.max(limit * 10, (int) includeRowIds.getLongCardinality());
+ }
+
+ for (FaissIndex index : indices) {
+ // Configure search parameters based on index type
+ configureSearchParams(index);
+
+ // Limit searchK to the index size
+ int effectiveK = (int) Math.min(searchK, index.size());
+ if (effectiveK <= 0) {
+ continue;
+ }
+
+ FaissIndex.SearchResult searchResult = index.search(queryVector, effectiveK);
+ float[] distances = searchResult.getDistancesForQuery(0);
+ long[] labels = searchResult.getLabelsForQuery(0);
+
+ for (int i = 0; i < effectiveK; i++) {
+ long rowId = labels[i];
+ if (rowId < 0) {
+ // Invalid result (not enough neighbors)
+ continue;
+ }
+
+ // Filter by include row IDs if specified
+ if (includeRowIds != null && !includeRowIds.contains(rowId)) {
+ continue;
+ }
+
+ // Convert distance to score (higher is better for similarity)
+ float score = convertDistanceToScore(distances[i]);
+
+ if (result.size() < limit) {
+ result.offer(new ScoredRow(rowId, score));
+ } else {
+ if (result.peek() != null && score > result.peek().score) {
+ result.poll();
+ result.offer(new ScoredRow(rowId, score));
+ }
+ }
+ }
+ }
+
+ RoaringNavigableMap64 roaringBitmap64 = new RoaringNavigableMap64();
+ HashMap id2scores = new HashMap<>(result.size());
+ for (ScoredRow scoredRow : result) {
+ id2scores.put(scoredRow.rowId, scoredRow.score);
+ roaringBitmap64.add(scoredRow.rowId);
+ }
+ return new FaissVectorSearchGlobalIndexResult(roaringBitmap64, id2scores);
+ }
+
+ private void configureSearchParams(FaissIndex index) {
+ switch (index.indexType()) {
+ case HNSW:
+ index.setHnswEfSearch(options.efSearch());
+ break;
+ case IVF:
+ case IVF_PQ:
+ index.setIvfNprobe(options.nprobe());
+ break;
+ default:
+ // No special configuration needed
+ break;
+ }
+ }
+
+ private float convertDistanceToScore(float distance) {
+ // For L2 distance, smaller is better, so we invert it
+ // For inner product, larger is better (already a similarity)
+ if (options.metric() == FaissVectorMetric.L2) {
+ // Convert L2 distance to similarity score
+ return 1.0f / (1.0f + distance);
+ } else {
+ // Inner product is already a similarity
+ return distance;
+ }
+ }
+
+ private void validateVectorType(Object vector) {
+ if (!(vector instanceof float[])) {
+ throw new IllegalArgumentException(
+ "Expected float[] vector but got: " + vector.getClass());
+ }
+ if (!(fieldType instanceof ArrayType)
+ || !(((ArrayType) fieldType).getElementType() instanceof FloatType)) {
+ throw new IllegalArgumentException(
+ "FAISS currently only supports float arrays, but field type is: " + fieldType);
+ }
+ }
+
+ private void ensureLoadIndices() throws IOException {
+ if (!indicesLoaded) {
+ synchronized (this) {
+ if (!indicesLoaded) {
+ for (GlobalIndexIOMeta meta : ioMetas) {
+ FaissIndex index = null;
+ try (SeekableInputStream in = fileReader.getInputStream(meta.fileName())) {
+ index = loadIndex(in);
+ indices.add(index);
+ } catch (Exception e) {
+ IOUtils.closeQuietly(index);
+ throw e;
+ }
+ }
+ indicesLoaded = true;
+ }
+ }
+ }
+ }
+
+ private FaissIndex loadIndex(SeekableInputStream in) throws IOException {
+ DataInputStream dataIn = new DataInputStream(in);
+ int version = dataIn.readInt();
+ if (version != VERSION) {
+ throw new IOException("Unsupported FAISS index version: " + version);
+ }
+
+ int dim = dataIn.readInt();
+ int metricValue = dataIn.readInt();
+ int indexTypeOrdinal = dataIn.readInt();
+ long numVectors = dataIn.readLong();
+ int indexDataLength = dataIn.readInt();
+
+ byte[] indexData = new byte[indexDataLength];
+ dataIn.readFully(indexData);
+
+ return FaissIndex.fromBytes(indexData);
+ }
+
+ @Override
+ public void close() throws IOException {
+ Throwable firstException = null;
+
+ for (FaissIndex index : indices) {
+ try {
+ index.close();
+ } catch (Throwable t) {
+ if (firstException == null) {
+ firstException = t;
+ } else {
+ firstException.addSuppressed(t);
+ }
+ }
+ }
+ indices.clear();
+
+ if (firstException != null) {
+ if (firstException instanceof IOException) {
+ throw (IOException) firstException;
+ } else if (firstException instanceof RuntimeException) {
+ throw (RuntimeException) firstException;
+ } else {
+ throw new RuntimeException(
+ "Failed to close FAISS vector global index reader", firstException);
+ }
+ }
+ }
+
+ /** Helper class to store row ID with its score. */
+ private static class ScoredRow {
+ final long rowId;
+ final float score;
+
+ ScoredRow(long rowId, float score) {
+ this.rowId = rowId;
+ this.score = score;
+ }
+ }
+
+ // =================== unsupported =====================
+
+ @Override
+ public Optional visitIsNotNull(FieldRef fieldRef) {
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional visitIsNull(FieldRef fieldRef) {
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional visitStartsWith(FieldRef fieldRef, Object literal) {
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional visitEndsWith(FieldRef fieldRef, Object literal) {
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional visitContains(FieldRef fieldRef, Object literal) {
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional visitLike(FieldRef fieldRef, Object literal) {
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional visitLessThan(FieldRef fieldRef, Object literal) {
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional visitGreaterOrEqual(FieldRef fieldRef, Object literal) {
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional visitNotEqual(FieldRef fieldRef, Object literal) {
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional visitLessOrEqual(FieldRef fieldRef, Object literal) {
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional visitEqual(FieldRef fieldRef, Object literal) {
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional visitGreaterThan(FieldRef fieldRef, Object literal) {
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional visitIn(FieldRef fieldRef, List