From 832c1c53b065747fb884e8a527e952587db8e755 Mon Sep 17 00:00:00 2001 From: Peng Lu Date: Sun, 7 Dec 2025 23:19:32 +0800 Subject: [PATCH 1/2] [arrow] Fix incorrect index usage in ArrowFieldWriters.TimeWriter --- .../arrow/writer/ArrowFieldWriters.java | 2 +- .../arrow/vector/ArrowFormatWriterTest.java | 54 +++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/paimon-arrow/src/main/java/org/apache/paimon/arrow/writer/ArrowFieldWriters.java b/paimon-arrow/src/main/java/org/apache/paimon/arrow/writer/ArrowFieldWriters.java index 9e4f371a793d..1762786db007 100644 --- a/paimon-arrow/src/main/java/org/apache/paimon/arrow/writer/ArrowFieldWriters.java +++ b/paimon-arrow/src/main/java/org/apache/paimon/arrow/writer/ArrowFieldWriters.java @@ -454,7 +454,7 @@ protected void doWrite( if (columnVector.isNullAt(row)) { timeMilliVector.setNull(i); } else { - int value = ((IntColumnVector) columnVector).getInt(i); + int value = ((IntColumnVector) columnVector).getInt(row); timeMilliVector.setSafe(i, value); } } diff --git a/paimon-arrow/src/test/java/org/apache/paimon/arrow/vector/ArrowFormatWriterTest.java b/paimon-arrow/src/test/java/org/apache/paimon/arrow/vector/ArrowFormatWriterTest.java index 63b71554744f..0e117a77829d 100644 --- a/paimon-arrow/src/test/java/org/apache/paimon/arrow/vector/ArrowFormatWriterTest.java +++ b/paimon-arrow/src/test/java/org/apache/paimon/arrow/vector/ArrowFormatWriterTest.java @@ -20,8 +20,10 @@ import org.apache.paimon.arrow.ArrowBundleRecords; import org.apache.paimon.arrow.ArrowFieldTypeConversion; +import org.apache.paimon.arrow.ArrowUtils; import org.apache.paimon.arrow.converter.Arrow2PaimonVectorConverter; import org.apache.paimon.arrow.reader.ArrowBatchReader; +import org.apache.paimon.arrow.writer.ArrowFieldWriter; import org.apache.paimon.arrow.writer.ArrowFieldWriterFactoryVisitor; import org.apache.paimon.data.BinaryString; import org.apache.paimon.data.Decimal; @@ -30,6 +32,7 @@ import org.apache.paimon.data.GenericRow; import org.apache.paimon.data.InternalRow; import org.apache.paimon.data.Timestamp; +import org.apache.paimon.data.columnar.IntColumnVector; import org.apache.paimon.types.DataField; import org.apache.paimon.types.DataTypes; import org.apache.paimon.types.RowType; @@ -40,6 +43,7 @@ import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.TimeMilliVector; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.complex.ListVector; @@ -52,6 +56,7 @@ import java.math.BigDecimal; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -433,6 +438,55 @@ public void testWriteRowArrayTwice() { } } + @Test + public void testTimeFieldWriterWithOffset() { + RowType rowType = + new RowType( + Collections.singletonList( + new DataField(0, "time_field", DataTypes.TIME()))); + try (RootAllocator allocator = new RootAllocator(); + VectorSchemaRoot vsr = ArrowUtils.createVectorSchemaRoot(rowType, allocator)) { + ArrowFieldWriter[] fieldWriters = ArrowUtils.createArrowFieldWriters(vsr, rowType); + + IntColumnVector timeVec = + new IntColumnVector() { + final int[] values = new int[] {0, 1000, 2000, 3000, 4000}; + + @Override + public int getInt(int i) { + return values[i]; + } + + @Override + public boolean isNullAt(int i) { + return false; + } + }; + + TimeMilliVector timeMilliVector = (TimeMilliVector) vsr.getVector("time_field"); + final int batchRows = 3; + int startIndex = 0; + fieldWriters[0].write(timeVec, null, startIndex, batchRows); + vsr.setRowCount(batchRows); + + for (int i = 0; i < batchRows; i++) { + int arrowValue = timeMilliVector.get(i); + int paimonValue = timeVec.getInt(i + startIndex); + assertThat(arrowValue).isEqualTo(paimonValue); + } + + timeMilliVector.clear(); + startIndex = 2; + fieldWriters[0].write(timeVec, null, startIndex, batchRows); + vsr.setRowCount(batchRows); + for (int i = 0; i < batchRows; i++) { + int arrowValue = timeMilliVector.get(i); + int paimonValue = timeVec.getInt(i + startIndex); + assertThat(arrowValue).isEqualTo(paimonValue); + } + } + } + private void writeAndCheckRowArray(ArrowFormatWriter arrowFormatWriter) { GenericRow genericRow = new GenericRow(1); GenericRow innerRow = new GenericRow(1); From 8e37c63b18da1b43057b4bdb7ad2a0dbbc6a61a5 Mon Sep 17 00:00:00 2001 From: Peng Lu Date: Tue, 9 Dec 2025 18:48:30 +0800 Subject: [PATCH 2/2] Updated the test code to improve readability --- .../arrow/vector/ArrowFormatWriterTest.java | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/paimon-arrow/src/test/java/org/apache/paimon/arrow/vector/ArrowFormatWriterTest.java b/paimon-arrow/src/test/java/org/apache/paimon/arrow/vector/ArrowFormatWriterTest.java index 0e117a77829d..a70385941a75 100644 --- a/paimon-arrow/src/test/java/org/apache/paimon/arrow/vector/ArrowFormatWriterTest.java +++ b/paimon-arrow/src/test/java/org/apache/paimon/arrow/vector/ArrowFormatWriterTest.java @@ -440,15 +440,15 @@ public void testWriteRowArrayTwice() { @Test public void testTimeFieldWriterWithOffset() { + String fieldName = "time_field"; RowType rowType = new RowType( - Collections.singletonList( - new DataField(0, "time_field", DataTypes.TIME()))); + Collections.singletonList(new DataField(0, fieldName, DataTypes.TIME()))); try (RootAllocator allocator = new RootAllocator(); VectorSchemaRoot vsr = ArrowUtils.createVectorSchemaRoot(rowType, allocator)) { ArrowFieldWriter[] fieldWriters = ArrowUtils.createArrowFieldWriters(vsr, rowType); - IntColumnVector timeVec = + IntColumnVector paimonTimeVector = new IntColumnVector() { final int[] values = new int[] {0, 1000, 2000, 3000, 4000}; @@ -462,26 +462,28 @@ public boolean isNullAt(int i) { return false; } }; + TimeMilliVector arrowTimeVector = (TimeMilliVector) vsr.getVector(fieldName); - TimeMilliVector timeMilliVector = (TimeMilliVector) vsr.getVector("time_field"); final int batchRows = 3; int startIndex = 0; - fieldWriters[0].write(timeVec, null, startIndex, batchRows); + int sourceIndex = startIndex; + fieldWriters[0].write(paimonTimeVector, null, startIndex, batchRows); vsr.setRowCount(batchRows); - - for (int i = 0; i < batchRows; i++) { - int arrowValue = timeMilliVector.get(i); - int paimonValue = timeVec.getInt(i + startIndex); + for (int targetIndex = 0; targetIndex < batchRows; targetIndex++) { + int arrowValue = arrowTimeVector.get(targetIndex); + int paimonValue = paimonTimeVector.getInt(sourceIndex++); assertThat(arrowValue).isEqualTo(paimonValue); } - timeMilliVector.clear(); + arrowTimeVector.clear(); + startIndex = 2; - fieldWriters[0].write(timeVec, null, startIndex, batchRows); + sourceIndex = startIndex; + fieldWriters[0].write(paimonTimeVector, null, startIndex, batchRows); vsr.setRowCount(batchRows); - for (int i = 0; i < batchRows; i++) { - int arrowValue = timeMilliVector.get(i); - int paimonValue = timeVec.getInt(i + startIndex); + for (int targetIndex = 0; targetIndex < batchRows; targetIndex++) { + int arrowValue = arrowTimeVector.get(targetIndex); + int paimonValue = paimonTimeVector.getInt(sourceIndex); assertThat(arrowValue).isEqualTo(paimonValue); } }