From 5740485210db4d5de6879781198811dde98ac1b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BB=9F=E5=BC=8B?= Date: Wed, 31 Dec 2025 16:18:00 +0800 Subject: [PATCH 1/5] [pypaimon] Creation_time should be utc timestamp --- paimon-python/pypaimon/data/timestamp.py | 15 ++++---- .../manifest/manifest_file_manager.py | 2 +- .../manifest/schema/data_file_meta.py | 2 +- .../pypaimon/tests/file_store_commit_test.py | 36 +++++++++++++++---- 4 files changed, 39 insertions(+), 16 deletions(-) diff --git a/paimon-python/pypaimon/data/timestamp.py b/paimon-python/pypaimon/data/timestamp.py index ace4c1e1c61f..4e6b13d2b2b6 100644 --- a/paimon-python/pypaimon/data/timestamp.py +++ b/paimon-python/pypaimon/data/timestamp.py @@ -16,7 +16,7 @@ # limitations under the License. ################################################################################ -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone class Timestamp: @@ -117,8 +117,9 @@ def __str__(self): @staticmethod def now() -> 'Timestamp': - """Creates an instance of Timestamp for now.""" - return Timestamp.from_local_date_time(datetime.now()) + """Creates an instance of Timestamp for now (utc).""" + dt = datetime.now(timezone.utc) + return Timestamp.from_date_time(dt) @staticmethod def from_epoch_millis(milliseconds: int, nanos_of_millisecond: int = 0) -> 'Timestamp': @@ -131,15 +132,13 @@ def from_epoch_millis(milliseconds: int, nanos_of_millisecond: int = 0) -> 'Time return Timestamp(milliseconds, nanos_of_millisecond) @staticmethod - def from_local_date_time(date_time: datetime) -> 'Timestamp': + def from_date_time(date_time: datetime) -> 'Timestamp': """ - Creates an instance of Timestamp from a datetime (timezone-free). + Creates an instance of Timestamp from a datetime Args: - date_time: a datetime object (should be naive, without timezone) + date_time: a datetime object """ - if date_time.tzinfo is not None: - raise ValueError("datetime must be naive (no timezone)") epoch_date = datetime(1970, 1, 1).date() date_time_date = date_time.date() diff --git a/paimon-python/pypaimon/manifest/manifest_file_manager.py b/paimon-python/pypaimon/manifest/manifest_file_manager.py index f6ae41e3d386..67bb235d5566 100644 --- a/paimon-python/pypaimon/manifest/manifest_file_manager.py +++ b/paimon-python/pypaimon/manifest/manifest_file_manager.py @@ -117,7 +117,7 @@ def read(self, manifest_file_name: str, manifest_entry_filter=None, drop_stats=T epoch_millis = int(creation_time_value.timestamp() * 1000) creation_time_ts = Timestamp.from_epoch_millis(epoch_millis) else: - creation_time_ts = Timestamp.from_local_date_time(creation_time_value) + creation_time_ts = Timestamp.from_date_time(creation_time_value) elif isinstance(creation_time_value, (int, float)): creation_time_ts = Timestamp.from_epoch_millis(int(creation_time_value)) else: diff --git a/paimon-python/pypaimon/manifest/schema/data_file_meta.py b/paimon-python/pypaimon/manifest/schema/data_file_meta.py index 239c63dbba38..f94a39c8f126 100644 --- a/paimon-python/pypaimon/manifest/schema/data_file_meta.py +++ b/paimon-python/pypaimon/manifest/schema/data_file_meta.py @@ -17,7 +17,7 @@ ################################################################################ from dataclasses import dataclass -from datetime import datetime +from datetime import datetime, timezone from typing import List, Optional import time diff --git a/paimon-python/pypaimon/tests/file_store_commit_test.py b/paimon-python/pypaimon/tests/file_store_commit_test.py index 438ff6aeb8eb..3b54c038aa0b 100644 --- a/paimon-python/pypaimon/tests/file_store_commit_test.py +++ b/paimon-python/pypaimon/tests/file_store_commit_test.py @@ -65,7 +65,7 @@ def test_generate_partition_statistics_single_partition_single_file( from pypaimon.data.timestamp import Timestamp from pypaimon.table.row.generic_row import GenericRow from pypaimon.manifest.schema.simple_stats import SimpleStats - creation_time = Timestamp.from_local_date_time(creation_time_dt) + creation_time = Timestamp.from_date_time(creation_time_dt) file_meta = DataFileMeta.create( file_name="test_file_1.parquet", file_size=1024 * 1024, # 1MB @@ -115,8 +115,8 @@ def test_generate_partition_statistics_multiple_files_same_partition( from pypaimon.data.timestamp import Timestamp from pypaimon.table.row.generic_row import GenericRow from pypaimon.manifest.schema.simple_stats import SimpleStats - creation_time_1 = Timestamp.from_local_date_time(datetime(2024, 1, 15, 10, 30, 0)) - creation_time_2 = Timestamp.from_local_date_time(datetime(2024, 1, 15, 11, 30, 0)) # Later time + creation_time_1 = Timestamp.from_date_time(datetime(2024, 1, 15, 10, 30, 0)) + creation_time_2 = Timestamp.from_date_time(datetime(2024, 1, 15, 11, 30, 0)) # Later time file_meta_1 = DataFileMeta.create( file_name="test_file_1.parquet", @@ -180,7 +180,7 @@ def test_generate_partition_statistics_multiple_partitions( from pypaimon.data.timestamp import Timestamp from pypaimon.table.row.generic_row import GenericRow from pypaimon.manifest.schema.simple_stats import SimpleStats - creation_time = Timestamp.from_local_date_time(creation_time_dt) + creation_time = Timestamp.from_date_time(creation_time_dt) # File for partition 1 file_meta_1 = DataFileMeta.create( @@ -271,7 +271,7 @@ def test_generate_partition_statistics_unpartitioned_table( from pypaimon.data.timestamp import Timestamp from pypaimon.table.row.generic_row import GenericRow from pypaimon.manifest.schema.simple_stats import SimpleStats - creation_time = Timestamp.from_local_date_time(creation_time_dt) + creation_time = Timestamp.from_date_time(creation_time_dt) file_meta = DataFileMeta.create( file_name="test_file_1.parquet", file_size=1024 * 1024, @@ -346,6 +346,30 @@ def test_generate_partition_statistics_no_creation_time( # Should have a valid timestamp (current time) self.assertGreater(stat.last_file_creation_time, 0) + def test_creation_time( + self, mock_manifest_list_manager, mock_manifest_file_manager, mock_snapshot_manager): + from pypaimon.data.timestamp import Timestamp + from pypaimon.table.row.generic_row import GenericRow + from pypaimon.manifest.schema.simple_stats import SimpleStats + file_meta = DataFileMeta.create( + file_name="test_file_1.parquet", + file_size=1024 * 1024, + row_count=10000, + min_key=GenericRow([], []), + max_key=GenericRow([], []), + key_stats=SimpleStats.empty_stats(), + value_stats=SimpleStats.empty_stats(), + min_sequence_number=1, + max_sequence_number=100, + schema_id=0, + level=0, + extra_files=[], + creation_time=None + ) + creation_time = file_meta.creation_time + now = Timestamp.from_date_time(datetime.now()) + self.assertEqual(round((now.get_millisecond() - creation_time.get_millisecond()) / 60 / 60 / 1000), 8) + def test_generate_partition_statistics_mismatched_partition_keys( self, mock_manifest_list_manager, mock_manifest_file_manager, mock_snapshot_manager): """Test partition statistics generation when partition tuple doesn't match partition keys.""" @@ -369,7 +393,7 @@ def test_generate_partition_statistics_mismatched_partition_keys( schema_id=0, level=0, extra_files=[], - creation_time=Timestamp.from_local_date_time(datetime(2024, 1, 15, 10, 30, 0)) + creation_time=Timestamp.from_date_time(datetime(2024, 1, 15, 10, 30, 0)) ) commit_message = CommitMessage( From bfc8ffd9be66296621e935125dfe7d67f2536fc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BB=9F=E5=BC=8B?= Date: Wed, 31 Dec 2025 16:28:00 +0800 Subject: [PATCH 2/5] Fix minus --- paimon-python/pypaimon/manifest/schema/data_file_meta.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paimon-python/pypaimon/manifest/schema/data_file_meta.py b/paimon-python/pypaimon/manifest/schema/data_file_meta.py index f94a39c8f126..239c63dbba38 100644 --- a/paimon-python/pypaimon/manifest/schema/data_file_meta.py +++ b/paimon-python/pypaimon/manifest/schema/data_file_meta.py @@ -17,7 +17,7 @@ ################################################################################ from dataclasses import dataclass -from datetime import datetime, timezone +from datetime import datetime from typing import List, Optional import time From 84ffcaf5190cf6a2f01529af875fb516e5038ca0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BB=9F=E5=BC=8B?= Date: Wed, 31 Dec 2025 16:47:02 +0800 Subject: [PATCH 3/5] Fix minus --- paimon-python/pypaimon/tests/file_store_commit_test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/paimon-python/pypaimon/tests/file_store_commit_test.py b/paimon-python/pypaimon/tests/file_store_commit_test.py index 3b54c038aa0b..1ff83fa53311 100644 --- a/paimon-python/pypaimon/tests/file_store_commit_test.py +++ b/paimon-python/pypaimon/tests/file_store_commit_test.py @@ -367,8 +367,7 @@ def test_creation_time( creation_time=None ) creation_time = file_meta.creation_time - now = Timestamp.from_date_time(datetime.now()) - self.assertEqual(round((now.get_millisecond() - creation_time.get_millisecond()) / 60 / 60 / 1000), 8) + self.assertEqual(creation_time, Timestamp.now()) def test_generate_partition_statistics_mismatched_partition_keys( self, mock_manifest_list_manager, mock_manifest_file_manager, mock_snapshot_manager): From f46266d4b07fe0abd6f98392d54755ed0f2dc7c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BB=9F=E5=BC=8B?= Date: Wed, 31 Dec 2025 16:50:22 +0800 Subject: [PATCH 4/5] Fix minus --- paimon-python/pypaimon/tests/file_store_commit_test.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/paimon-python/pypaimon/tests/file_store_commit_test.py b/paimon-python/pypaimon/tests/file_store_commit_test.py index 1ff83fa53311..e04af507321c 100644 --- a/paimon-python/pypaimon/tests/file_store_commit_test.py +++ b/paimon-python/pypaimon/tests/file_store_commit_test.py @@ -18,8 +18,11 @@ import unittest from datetime import datetime +from zoneinfo import ZoneInfo from unittest.mock import Mock, patch +import dateutil + from pypaimon.manifest.schema.data_file_meta import DataFileMeta from pypaimon.manifest.schema.manifest_entry import ManifestEntry from pypaimon.snapshot.snapshot_commit import PartitionStatistics @@ -367,7 +370,8 @@ def test_creation_time( creation_time=None ) creation_time = file_meta.creation_time - self.assertEqual(creation_time, Timestamp.now()) + now = Timestamp.from_date_time(datetime.now(ZoneInfo("Asia/Shanghai"))) + self.assertEqual(round((now.get_millisecond() - creation_time.get_millisecond()) / 60 / 60 / 1000), 8) def test_generate_partition_statistics_mismatched_partition_keys( self, mock_manifest_list_manager, mock_manifest_file_manager, mock_snapshot_manager): From 8dda9d71f2b7f2116516d7941c2a87301a726624 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BB=9F=E5=BC=8B?= Date: Wed, 31 Dec 2025 17:02:21 +0800 Subject: [PATCH 5/5] Fix minus --- paimon-python/pypaimon/tests/file_store_commit_test.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/paimon-python/pypaimon/tests/file_store_commit_test.py b/paimon-python/pypaimon/tests/file_store_commit_test.py index e04af507321c..82f053e450a8 100644 --- a/paimon-python/pypaimon/tests/file_store_commit_test.py +++ b/paimon-python/pypaimon/tests/file_store_commit_test.py @@ -21,8 +21,6 @@ from zoneinfo import ZoneInfo from unittest.mock import Mock, patch -import dateutil - from pypaimon.manifest.schema.data_file_meta import DataFileMeta from pypaimon.manifest.schema.manifest_entry import ManifestEntry from pypaimon.snapshot.snapshot_commit import PartitionStatistics