From 6ba59a0a6ac655d48710b6364fac6c1513144d11 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sun, 11 May 2025 12:14:32 -0700 Subject: [PATCH 01/26] add GeotagImageSamplesFromVideo --- mapillary_tools/geotag/factory.py | 25 ++----------- .../geotag/geotag_images_from_video.py | 35 +++++++++++++++++++ 2 files changed, 38 insertions(+), 22 deletions(-) diff --git a/mapillary_tools/geotag/factory.py b/mapillary_tools/geotag/factory.py index def91c7a7..ede595a99 100644 --- a/mapillary_tools/geotag/factory.py +++ b/mapillary_tools/geotag/factory.py @@ -6,7 +6,6 @@ from pathlib import Path from .. import exceptions, types, utils -from ..types import FileType from . import ( base, geotag_images_from_exif, @@ -205,27 +204,9 @@ def _geotag_images( ) return geotag.to_description(image_paths) - elif option.source in [ - SourceType.GOPRO, - SourceType.BLACKVUE, - SourceType.CAMM, - ]: - map_geotag_source_to_filetype: dict[SourceType, FileType] = { - SourceType.GOPRO: FileType.GOPRO, - SourceType.BLACKVUE: FileType.BLACKVUE, - SourceType.CAMM: FileType.CAMM, - } - video_paths = utils.find_videos([_ensure_source_path(option)]) - image_samples_by_video_path = utils.find_all_image_samples( - image_paths, video_paths - ) - video_paths_with_image_samples = list(image_samples_by_video_path.keys()) - video_metadatas = geotag_videos_from_video.GeotagVideosFromVideo( - filetypes={map_geotag_source_to_filetype[option.source]}, - num_processes=option.num_processes, - ).to_description(video_paths_with_image_samples) - geotag = geotag_images_from_video.GeotagImagesFromVideo( - video_metadatas, + elif option.source in [SourceType.GOPRO, SourceType.BLACKVUE, SourceType.CAMM]: + geotag = geotag_images_from_video.GeotagImageSamplesFromVideo( + _ensure_source_path(option), offset_time=interpolation.offset_time, num_processes=option.num_processes, ) diff --git a/mapillary_tools/geotag/geotag_images_from_video.py b/mapillary_tools/geotag/geotag_images_from_video.py index 6d032b21b..91468d955 100644 --- a/mapillary_tools/geotag/geotag_images_from_video.py +++ b/mapillary_tools/geotag/geotag_images_from_video.py @@ -13,6 +13,7 @@ from .. import types, utils from .base import GeotagImagesFromGeneric from .geotag_images_from_gpx import GeotagImagesFromGPX +from .geotag_videos_from_video import GeotagVideosFromVideo LOG = logging.getLogger(__name__) @@ -90,3 +91,37 @@ def to_description( assert len(final_image_metadatas) <= len(image_paths) return final_image_metadatas + + +class GeotagImageSamplesFromVideo(GeotagImagesFromGeneric): + def __init__( + self, + source_path: Path, + filetypes: set[types.FileType] | None = None, + offset_time: float = 0.0, + num_processes: int | None = None, + ): + super().__init__(num_processes=num_processes) + self.source_path = source_path + self.filetypes = filetypes + self.offset_time = offset_time + + @override + def to_description( + self, image_paths: T.Sequence[Path] + ) -> list[types.ImageMetadataOrError]: + video_paths = utils.find_videos([self.source_path]) + image_samples_by_video_path = utils.find_all_image_samples( + image_paths, video_paths + ) + video_paths_with_image_samples = list(image_samples_by_video_path.keys()) + video_metadatas = GeotagVideosFromVideo( + filetypes=self.filetypes, + num_processes=self.num_processes, + ).to_description(video_paths_with_image_samples) + geotag = GeotagImagesFromVideo( + video_metadatas, + offset_time=self.offset_time, + num_processes=self.num_processes, + ) + return geotag.to_description(image_paths) From 83d481a344536f6d1670fc0f77cd16b3767f5925 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sun, 11 May 2025 13:28:30 -0700 Subject: [PATCH 02/26] rename --- mapillary_tools/geotag/factory.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/mapillary_tools/geotag/factory.py b/mapillary_tools/geotag/factory.py index ede595a99..ff6eb9760 100644 --- a/mapillary_tools/geotag/factory.py +++ b/mapillary_tools/geotag/factory.py @@ -69,8 +69,8 @@ def process( for idx, option in enumerate(options): LOG.debug("Processing %d files with %s", len(reprocessable_paths), option) - image_metadata_or_errors = _geotag_images(reprocessable_paths, option) - video_metadata_or_errors = _geotag_videos(reprocessable_paths, option) + image_metadata_or_errors = _build_image_geotag(reprocessable_paths, option) + video_metadata_or_errors = _build_video_geotag(reprocessable_paths, option) more_option = idx < len(options) - 1 @@ -139,7 +139,7 @@ def _ensure_source_path(option: SourceOption) -> Path: return option.source_path.source_path -def _geotag_images( +def _build_image_geotag( paths: T.Iterable[Path], option: SourceOption ) -> list[types.ImageMetadataOrError]: image_paths, _ = _filter_images_and_videos(paths, option.filetypes) @@ -216,7 +216,7 @@ def _geotag_images( raise ValueError(f"Invalid geotag source {option.source}") -def _geotag_videos( +def _build_video_geotag( paths: T.Iterable[Path], option: SourceOption ) -> list[types.VideoMetadataOrError]: _, video_paths = _filter_images_and_videos(paths, option.filetypes) @@ -260,11 +260,7 @@ def _geotag_videos( # Legacy image-specific geotag types return [] - elif option.source in [ - SourceType.GOPRO, - SourceType.BLACKVUE, - SourceType.CAMM, - ]: + elif option.source in [SourceType.GOPRO, SourceType.BLACKVUE, SourceType.CAMM]: # Legacy image-specific geotag types return [] From 8adc00cc5e2198222a100dab5df5f450ae0cccf5 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sun, 11 May 2025 13:45:07 -0700 Subject: [PATCH 03/26] build geotags first --- mapillary_tools/geotag/factory.py | 76 ++++++++++++++----------------- 1 file changed, 35 insertions(+), 41 deletions(-) diff --git a/mapillary_tools/geotag/factory.py b/mapillary_tools/geotag/factory.py index ff6eb9760..db7e930d5 100644 --- a/mapillary_tools/geotag/factory.py +++ b/mapillary_tools/geotag/factory.py @@ -63,14 +63,30 @@ def process( final_metadatas: list[types.MetadataOrError] = [] + video_geotags = [_build_video_geotag(option) for option in options] + image_geotags = [_build_image_geotag(option) for option in options] + # Paths (image path or video path) that will be sent to the next geotag process reprocessable_paths = set(paths) - for idx, option in enumerate(options): + for idx, (option, video_geotag, image_geotag) in enumerate( + zip(options, video_geotags, image_geotags) + ): LOG.debug("Processing %d files with %s", len(reprocessable_paths), option) - image_metadata_or_errors = _build_image_geotag(reprocessable_paths, option) - video_metadata_or_errors = _build_video_geotag(reprocessable_paths, option) + image_videos, video_paths = _filter_images_and_videos( + reprocessable_paths, option.filetypes + ) + + if image_videos and image_geotag is not None: + image_metadata_or_errors = image_geotag.to_description(image_videos) + else: + image_metadata_or_errors = [] + + if video_paths and video_geotag is not None: + video_metadata_or_errors = video_geotag.to_description(video_paths) + else: + video_metadata_or_errors = [] more_option = idx < len(options) - 1 @@ -139,14 +155,7 @@ def _ensure_source_path(option: SourceOption) -> Path: return option.source_path.source_path -def _build_image_geotag( - paths: T.Iterable[Path], option: SourceOption -) -> list[types.ImageMetadataOrError]: - image_paths, _ = _filter_images_and_videos(paths, option.filetypes) - - if not image_paths: - return [] - +def _build_image_geotag(option: SourceOption) -> base.GeotagImagesFromGeneric | None: if option.interpolation is None: interpolation = InterpolationOption() else: @@ -158,17 +167,13 @@ def _build_image_geotag( geotag = geotag_images_from_exif.GeotagImagesFromEXIF( num_processes=option.num_processes ) - return geotag.to_description(image_paths) + return geotag if option.source is SourceType.EXIFTOOL_RUNTIME: geotag = geotag_images_from_exiftool.GeotagImagesFromExifToolRunner( num_processes=option.num_processes ) - try: - return geotag.to_description(image_paths) - except exceptions.MapillaryExiftoolNotFoundError as ex: - LOG.warning('Skip "%s" because: %s', option.source.value, ex) - return [] + return geotag elif option.source is SourceType.EXIFTOOL_XML: # This is to ensure 'video_process --geotag={"source": "exiftool_xml", "source_path": "/tmp/xml_path"}' @@ -177,7 +182,7 @@ def _build_image_geotag( xml_path=_ensure_source_path(option), num_processes=option.num_processes, ) - return geotag.to_description(image_paths) + return geotag elif option.source is SourceType.GPX: geotag = geotag_images_from_gpx_file.GeotagImagesFromGPXFile( @@ -186,7 +191,7 @@ def _build_image_geotag( offset_time=interpolation.offset_time, num_processes=option.num_processes, ) - return geotag.to_description(image_paths) + return geotag elif option.source is SourceType.NMEA: geotag = geotag_images_from_nmea_file.GeotagImagesFromNMEAFile( @@ -196,13 +201,13 @@ def _build_image_geotag( num_processes=option.num_processes, ) - return geotag.to_description(image_paths) + return geotag elif option.source is SourceType.EXIF: geotag = geotag_images_from_exif.GeotagImagesFromEXIF( num_processes=option.num_processes ) - return geotag.to_description(image_paths) + return geotag elif option.source in [SourceType.GOPRO, SourceType.BLACKVUE, SourceType.CAMM]: geotag = geotag_images_from_video.GeotagImageSamplesFromVideo( @@ -210,59 +215,48 @@ def _build_image_geotag( offset_time=interpolation.offset_time, num_processes=option.num_processes, ) - return geotag.to_description(image_paths) + return geotag else: raise ValueError(f"Invalid geotag source {option.source}") -def _build_video_geotag( - paths: T.Iterable[Path], option: SourceOption -) -> list[types.VideoMetadataOrError]: - _, video_paths = _filter_images_and_videos(paths, option.filetypes) - - if not video_paths: - return [] - +def _build_video_geotag(option: SourceOption) -> base.GeotagVideosFromGeneric | None: geotag: base.GeotagVideosFromGeneric if option.source is SourceType.NATIVE: geotag = geotag_videos_from_video.GeotagVideosFromVideo( num_processes=option.num_processes, filetypes=option.filetypes ) - return geotag.to_description(video_paths) + return geotag if option.source is SourceType.EXIFTOOL_RUNTIME: geotag = geotag_videos_from_exiftool.GeotagVideosFromExifToolRunner( num_processes=option.num_processes ) - try: - return geotag.to_description(video_paths) - except exceptions.MapillaryExiftoolNotFoundError as ex: - LOG.warning('Skip "%s" because: %s', option.source.value, ex) - return [] + return geotag elif option.source is SourceType.EXIFTOOL_XML: geotag = geotag_videos_from_exiftool.GeotagVideosFromExifToolXML( xml_path=_ensure_source_path(option), ) - return geotag.to_description(video_paths) + return geotag elif option.source is SourceType.GPX: geotag = geotag_videos_from_gpx.GeotagVideosFromGPX() - return geotag.to_description(video_paths) + return geotag elif option.source is SourceType.NMEA: # TODO: geotag videos from NMEA - return [] + return None elif option.source is SourceType.EXIF: # Legacy image-specific geotag types - return [] + return None elif option.source in [SourceType.GOPRO, SourceType.BLACKVUE, SourceType.CAMM]: # Legacy image-specific geotag types - return [] + return None else: raise ValueError(f"Invalid geotag source {option.source}") From edfe6e5c49acc8d4a8200081f988d3411fc33fc6 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sun, 11 May 2025 14:21:01 -0700 Subject: [PATCH 04/26] fix json schema --- mapillary_tools/geotag/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mapillary_tools/geotag/options.py b/mapillary_tools/geotag/options.py index c3c243e61..b9a1e5b78 100644 --- a/mapillary_tools/geotag/options.py +++ b/mapillary_tools/geotag/options.py @@ -140,7 +140,7 @@ class InterpolationOption: "type": "integer", }, "interpolation_offset_time": { - "type": "float", + "type": "number", }, "interpolation_use_gpx_start_time": { "type": "boolean", From 5d79e4730a913a509864553221314fd97f4e5115 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sun, 11 May 2025 20:55:51 -0700 Subject: [PATCH 05/26] refactor --- mapillary_tools/geotag/factory.py | 59 ++++++++++++------------------- 1 file changed, 22 insertions(+), 37 deletions(-) diff --git a/mapillary_tools/geotag/factory.py b/mapillary_tools/geotag/factory.py index db7e930d5..ec2be8b63 100644 --- a/mapillary_tools/geotag/factory.py +++ b/mapillary_tools/geotag/factory.py @@ -63,28 +63,29 @@ def process( final_metadatas: list[types.MetadataOrError] = [] - video_geotags = [_build_video_geotag(option) for option in options] - image_geotags = [_build_image_geotag(option) for option in options] - # Paths (image path or video path) that will be sent to the next geotag process reprocessable_paths = set(paths) - for idx, (option, video_geotag, image_geotag) in enumerate( - zip(options, video_geotags, image_geotags) - ): + for idx, option in enumerate(options): LOG.debug("Processing %d files with %s", len(reprocessable_paths), option) image_videos, video_paths = _filter_images_and_videos( reprocessable_paths, option.filetypes ) - if image_videos and image_geotag is not None: - image_metadata_or_errors = image_geotag.to_description(image_videos) + if image_videos: + image_geotag = _build_image_geotag(option) + image_metadata_or_errors = ( + image_geotag.to_description(image_videos) if image_geotag else [] + ) else: image_metadata_or_errors = [] - if video_paths and video_geotag is not None: - video_metadata_or_errors = video_geotag.to_description(video_paths) + if video_paths: + video_geotag = _build_video_geotag(option) + video_metadata_or_errors = ( + video_geotag.to_description(video_paths) if video_geotag else [] + ) else: video_metadata_or_errors = [] @@ -161,90 +162,74 @@ def _build_image_geotag(option: SourceOption) -> base.GeotagImagesFromGeneric | else: interpolation = option.interpolation - geotag: base.GeotagImagesFromGeneric - if option.source is SourceType.NATIVE: - geotag = geotag_images_from_exif.GeotagImagesFromEXIF( + return geotag_images_from_exif.GeotagImagesFromEXIF( num_processes=option.num_processes ) - return geotag if option.source is SourceType.EXIFTOOL_RUNTIME: - geotag = geotag_images_from_exiftool.GeotagImagesFromExifToolRunner( + return geotag_images_from_exiftool.GeotagImagesFromExifToolRunner( num_processes=option.num_processes ) - return geotag elif option.source is SourceType.EXIFTOOL_XML: # This is to ensure 'video_process --geotag={"source": "exiftool_xml", "source_path": "/tmp/xml_path"}' # to work - geotag = geotag_images_from_exiftool.GeotagImagesFromExifToolWithSamples( + return geotag_images_from_exiftool.GeotagImagesFromExifToolWithSamples( xml_path=_ensure_source_path(option), num_processes=option.num_processes, ) - return geotag elif option.source is SourceType.GPX: - geotag = geotag_images_from_gpx_file.GeotagImagesFromGPXFile( + return geotag_images_from_gpx_file.GeotagImagesFromGPXFile( source_path=_ensure_source_path(option), use_gpx_start_time=interpolation.use_gpx_start_time, offset_time=interpolation.offset_time, num_processes=option.num_processes, ) - return geotag elif option.source is SourceType.NMEA: - geotag = geotag_images_from_nmea_file.GeotagImagesFromNMEAFile( + return geotag_images_from_nmea_file.GeotagImagesFromNMEAFile( source_path=_ensure_source_path(option), use_gpx_start_time=interpolation.use_gpx_start_time, offset_time=interpolation.offset_time, num_processes=option.num_processes, ) - return geotag - elif option.source is SourceType.EXIF: - geotag = geotag_images_from_exif.GeotagImagesFromEXIF( + return geotag_images_from_exif.GeotagImagesFromEXIF( num_processes=option.num_processes ) - return geotag elif option.source in [SourceType.GOPRO, SourceType.BLACKVUE, SourceType.CAMM]: - geotag = geotag_images_from_video.GeotagImageSamplesFromVideo( + return geotag_images_from_video.GeotagImageSamplesFromVideo( _ensure_source_path(option), offset_time=interpolation.offset_time, num_processes=option.num_processes, ) - return geotag else: raise ValueError(f"Invalid geotag source {option.source}") def _build_video_geotag(option: SourceOption) -> base.GeotagVideosFromGeneric | None: - geotag: base.GeotagVideosFromGeneric - if option.source is SourceType.NATIVE: - geotag = geotag_videos_from_video.GeotagVideosFromVideo( + return geotag_videos_from_video.GeotagVideosFromVideo( num_processes=option.num_processes, filetypes=option.filetypes ) - return geotag if option.source is SourceType.EXIFTOOL_RUNTIME: - geotag = geotag_videos_from_exiftool.GeotagVideosFromExifToolRunner( + return geotag_videos_from_exiftool.GeotagVideosFromExifToolRunner( num_processes=option.num_processes ) - return geotag elif option.source is SourceType.EXIFTOOL_XML: - geotag = geotag_videos_from_exiftool.GeotagVideosFromExifToolXML( + return geotag_videos_from_exiftool.GeotagVideosFromExifToolXML( xml_path=_ensure_source_path(option), ) - return geotag elif option.source is SourceType.GPX: - geotag = geotag_videos_from_gpx.GeotagVideosFromGPX() - return geotag + return geotag_videos_from_gpx.GeotagVideosFromGPX() elif option.source is SourceType.NMEA: # TODO: geotag videos from NMEA From c1489ed5c7f64cb1ace2d21f96f32db45767f668 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Wed, 25 Jun 2025 23:41:55 -0700 Subject: [PATCH 06/26] fix --- mapillary_tools/geotag/factory.py | 11 ++++------- mapillary_tools/geotag/geotag_videos_from_gpx.py | 13 ++++++++----- mapillary_tools/geotag/options.py | 8 ++++++-- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/mapillary_tools/geotag/factory.py b/mapillary_tools/geotag/factory.py index ec2be8b63..297efb25d 100644 --- a/mapillary_tools/geotag/factory.py +++ b/mapillary_tools/geotag/factory.py @@ -162,7 +162,7 @@ def _build_image_geotag(option: SourceOption) -> base.GeotagImagesFromGeneric | else: interpolation = option.interpolation - if option.source is SourceType.NATIVE: + if option.source in [SourceType.EXIF, SourceType.NATIVE]: return geotag_images_from_exif.GeotagImagesFromEXIF( num_processes=option.num_processes ) @@ -196,11 +196,6 @@ def _build_image_geotag(option: SourceOption) -> base.GeotagImagesFromGeneric | num_processes=option.num_processes, ) - elif option.source is SourceType.EXIF: - return geotag_images_from_exif.GeotagImagesFromEXIF( - num_processes=option.num_processes - ) - elif option.source in [SourceType.GOPRO, SourceType.BLACKVUE, SourceType.CAMM]: return geotag_images_from_video.GeotagImageSamplesFromVideo( _ensure_source_path(option), @@ -229,7 +224,9 @@ def _build_video_geotag(option: SourceOption) -> base.GeotagVideosFromGeneric | ) elif option.source is SourceType.GPX: - return geotag_videos_from_gpx.GeotagVideosFromGPX() + return geotag_videos_from_gpx.GeotagVideosFromGPX( + option=option.source_path, num_processes=option.num_processes + ) elif option.source is SourceType.NMEA: # TODO: geotag videos from NMEA diff --git a/mapillary_tools/geotag/geotag_videos_from_gpx.py b/mapillary_tools/geotag/geotag_videos_from_gpx.py index a5e8afd85..e2ceaed9c 100644 --- a/mapillary_tools/geotag/geotag_videos_from_gpx.py +++ b/mapillary_tools/geotag/geotag_videos_from_gpx.py @@ -26,14 +26,17 @@ def __init__( ): super().__init__(num_processes=num_processes) if option is None: - option = options.SourcePathOption(pattern="%f.gpx") + option = options.SourcePathOption(pattern="%g.gpx") self.option = option @override def _generate_video_extractors( self, video_paths: T.Sequence[Path] ) -> T.Sequence[GPXVideoExtractor]: - return [ - GPXVideoExtractor(video_path, self.option.resolve(video_path)) - for video_path in video_paths - ] + extractors = [] + for video_path in video_paths: + resolved_path = self.option.resolve(video_path) + if not resolved_path.is_file(): + pass + extractors.append(GPXVideoExtractor(video_path, resolved_path)) + return extractors diff --git a/mapillary_tools/geotag/options.py b/mapillary_tools/geotag/options.py index b9a1e5b78..c88ecadfe 100644 --- a/mapillary_tools/geotag/options.py +++ b/mapillary_tools/geotag/options.py @@ -60,9 +60,13 @@ def from_dict(cls, data: dict[str, T.Any]) -> SourceOption: elif k == "filetypes": kwargs[k] = {types.FileType(t) for t in v} elif k == "source_path": - kwargs.setdefault("source_path", SourcePathOption()).source_path = v + kwargs.setdefault( + "source_path", SourcePathOption(source_path=Path(v)) + ).sourthe_path = Path(v) elif k == "pattern": - kwargs.setdefault("source_path", SourcePathOption()).pattern = v + kwargs.setdefault( + "source_path", SourcePathOption(pattern=v) + ).pattern = v elif k == "interpolation_offset_time": kwargs.setdefault( "interpolation", InterpolationOption() From d6e2881b7f7bf57dc2d46048e440cca051845ac1 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Fri, 27 Jun 2025 12:10:43 -0700 Subject: [PATCH 07/26] improve exiftool_runner --- mapillary_tools/exiftool_runner.py | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/mapillary_tools/exiftool_runner.py b/mapillary_tools/exiftool_runner.py index 7327b6a63..1d502877d 100644 --- a/mapillary_tools/exiftool_runner.py +++ b/mapillary_tools/exiftool_runner.py @@ -1,7 +1,5 @@ from __future__ import annotations -import platform -import shutil import subprocess import typing as T from pathlib import Path @@ -12,32 +10,15 @@ class ExiftoolRunner: Wrapper around ExifTool to run it in a subprocess """ - def __init__(self, exiftool_path: str | None = None, recursive: bool = False): - if exiftool_path is None: - exiftool_path = self._search_preferred_exiftool_path() - self.exiftool_path = exiftool_path + def __init__(self, exiftool_executable: str = "exiftool", recursive: bool = False): + if exiftool_executable is None: + exiftool_executable = self._search_preferred_exiftool_path() + self.exiftool_executable = exiftool_executable self.recursive = recursive - def _search_preferred_exiftool_path(self) -> str: - system = platform.system() - - if system and system.lower() == "windows": - exiftool_paths = ["exiftool.exe", "exiftool"] - else: - exiftool_paths = ["exiftool", "exiftool.exe"] - - for path in exiftool_paths: - full_path = shutil.which(path) - if full_path: - return path - - # Always return the prefered one, even if it is not found, - # and let the subprocess.run figure out the error later - return exiftool_paths[0] - def _build_args_read_stdin(self) -> list[str]: args: list[str] = [ - self.exiftool_path, + self.exiftool_executable, "-fast", "-q", "-n", # Disable print conversion From fcf227fd7072b75a766d04a83b2c4b451b78ceb1 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Fri, 27 Jun 2025 12:19:03 -0700 Subject: [PATCH 08/26] fix types --- mapillary_tools/exiftool_runner.py | 2 -- mapillary_tools/geotag/geotag_images_from_exiftool.py | 5 ++++- mapillary_tools/geotag/geotag_videos_from_exiftool.py | 5 ++++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/mapillary_tools/exiftool_runner.py b/mapillary_tools/exiftool_runner.py index 1d502877d..4987409c0 100644 --- a/mapillary_tools/exiftool_runner.py +++ b/mapillary_tools/exiftool_runner.py @@ -11,8 +11,6 @@ class ExiftoolRunner: """ def __init__(self, exiftool_executable: str = "exiftool", recursive: bool = False): - if exiftool_executable is None: - exiftool_executable = self._search_preferred_exiftool_path() self.exiftool_executable = exiftool_executable self.recursive = recursive diff --git a/mapillary_tools/geotag/geotag_images_from_exiftool.py b/mapillary_tools/geotag/geotag_images_from_exiftool.py index e5c42ac76..8d56a1915 100644 --- a/mapillary_tools/geotag/geotag_images_from_exiftool.py +++ b/mapillary_tools/geotag/geotag_images_from_exiftool.py @@ -68,7 +68,10 @@ class GeotagImagesFromExifToolRunner(GeotagImagesFromGeneric): def _generate_image_extractors( self, image_paths: T.Sequence[Path] ) -> T.Sequence[ImageExifToolExtractor | types.ErrorMetadata]: - runner = ExiftoolRunner(constants.EXIFTOOL_PATH) + if constants.EXIFTOOL_PATH is None: + runner = ExiftoolRunner() + else: + runner = ExiftoolRunner(constants.EXIFTOOL_PATH) LOG.debug( "Extracting XML from %d images with ExifTool command: %s", diff --git a/mapillary_tools/geotag/geotag_videos_from_exiftool.py b/mapillary_tools/geotag/geotag_videos_from_exiftool.py index 5b7de6839..7334c059c 100644 --- a/mapillary_tools/geotag/geotag_videos_from_exiftool.py +++ b/mapillary_tools/geotag/geotag_videos_from_exiftool.py @@ -66,7 +66,10 @@ class GeotagVideosFromExifToolRunner(GeotagVideosFromGeneric): def _generate_video_extractors( self, video_paths: T.Sequence[Path] ) -> T.Sequence[VideoExifToolExtractor | types.ErrorMetadata]: - runner = ExiftoolRunner(constants.EXIFTOOL_PATH) + if constants.EXIFTOOL_PATH is None: + runner = ExiftoolRunner() + else: + runner = ExiftoolRunner(constants.EXIFTOOL_PATH) LOG.debug( "Extracting XML from %d videos with ExifTool command: %s", From a4574f5ae30b90739c8b44470bc801fa95debc85 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Fri, 27 Jun 2025 15:20:44 -0700 Subject: [PATCH 09/26] wip --- .../geotag/geotag_videos_from_gpx.py | 24 +++++++++++++------ .../geotag/video_extractors/gpx.py | 7 +----- tests/integration/test_process.py | 9 +++++++ 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/mapillary_tools/geotag/geotag_videos_from_gpx.py b/mapillary_tools/geotag/geotag_videos_from_gpx.py index e2ceaed9c..89a56fd22 100644 --- a/mapillary_tools/geotag/geotag_videos_from_gpx.py +++ b/mapillary_tools/geotag/geotag_videos_from_gpx.py @@ -10,6 +10,7 @@ else: from typing_extensions import override +from .. import types, exceptions from . import options from .base import GeotagVideosFromGeneric from .video_extractors.gpx import GPXVideoExtractor @@ -32,11 +33,20 @@ def __init__( @override def _generate_video_extractors( self, video_paths: T.Sequence[Path] - ) -> T.Sequence[GPXVideoExtractor]: - extractors = [] + ) -> T.Sequence[GPXVideoExtractor | types.ErrorMetadata]: + results: list[GPXVideoExtractor | types.ErrorMetadata] = [] for video_path in video_paths: - resolved_path = self.option.resolve(video_path) - if not resolved_path.is_file(): - pass - extractors.append(GPXVideoExtractor(video_path, resolved_path)) - return extractors + source_path = self.option.resolve(video_path) + if source_path.is_file(): + results.append(GPXVideoExtractor(video_path, source_path)) + else: + results.append( + types.describe_error_metadata( + exceptions.MapillaryVideoGPSNotFoundError( + "GPX file not found for video" + ), + filename=video_path, + filetype=types.FileType.VIDEO, + ) + ) + return results diff --git a/mapillary_tools/geotag/video_extractors/gpx.py b/mapillary_tools/geotag/video_extractors/gpx.py index 560fa4294..27dcf62e9 100644 --- a/mapillary_tools/geotag/video_extractors/gpx.py +++ b/mapillary_tools/geotag/video_extractors/gpx.py @@ -28,12 +28,7 @@ def __init__(self, video_path: Path, gpx_path: Path): @override def extract(self) -> types.VideoMetadataOrError: - try: - gpx_tracks = parse_gpx(self.gpx_path) - except Exception as ex: - raise RuntimeError( - f"Error parsing GPX {self.gpx_path}: {ex.__class__.__name__}: {ex}" - ) + gpx_tracks = parse_gpx(self.gpx_path) if 1 < len(gpx_tracks): LOG.warning( diff --git a/tests/integration/test_process.py b/tests/integration/test_process.py index a3afe06f2..97d5f1633 100644 --- a/tests/integration/test_process.py +++ b/tests/integration/test_process.py @@ -816,3 +816,12 @@ def test_video_process_multiple_videos(setup_data: py.path.local): assert "sample-5s.mp4" in d["filename"] assert 0 == len(find_desc_errors(descs)) assert 3 == len(filter_out_errors(descs)) + + +def test_process_video_geotag_source_gpx(setup_data: py.path.local): + video_path = setup_data.join("videos").join("sample-5s.mp4") + subprocess.run( + f"""{EXECUTABLE} process {PROCESS_FLAGS} --skip_process_errors --video_geotag_source=gpx {video_path}""", + shell=True, + check=True, + ) From 6f317f1892cedc2d3fe3334faeae0ba6e4093d68 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sat, 28 Jun 2025 17:12:09 -0700 Subject: [PATCH 10/26] add __init__.py --- mapillary_tools/geotag/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 mapillary_tools/geotag/__init__.py diff --git a/mapillary_tools/geotag/__init__.py b/mapillary_tools/geotag/__init__.py new file mode 100644 index 000000000..e69de29bb From fa8afd9d25a99cdc2eb99b3eadc8d6abf712e378 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sat, 28 Jun 2025 18:12:37 -0700 Subject: [PATCH 11/26] fix geotag gpx --- .../geotag/geotag_images_from_gpx.py | 6 + .../geotag/video_extractors/base.py | 2 +- .../geotag/video_extractors/gpx.py | 123 +++++++++--------- .../geotag/video_extractors/native.py | 40 ++---- tests/integration/test_process.py | 72 +++++++++- 5 files changed, 143 insertions(+), 100 deletions(-) diff --git a/mapillary_tools/geotag/geotag_images_from_gpx.py b/mapillary_tools/geotag/geotag_images_from_gpx.py index 2ec0b9d2a..5e0b9317a 100644 --- a/mapillary_tools/geotag/geotag_images_from_gpx.py +++ b/mapillary_tools/geotag/geotag_images_from_gpx.py @@ -20,6 +20,12 @@ LOG = logging.getLogger(__name__) +class SyncMode: + SYNC = "sync" + STRICT_SYNC = "strict_sync" + RESET = "reset" + + class GeotagImagesFromGPX(GeotagImagesFromGeneric): def __init__( self, diff --git a/mapillary_tools/geotag/video_extractors/base.py b/mapillary_tools/geotag/video_extractors/base.py index 1a2e03b3c..7bf7a34ba 100644 --- a/mapillary_tools/geotag/video_extractors/base.py +++ b/mapillary_tools/geotag/video_extractors/base.py @@ -14,5 +14,5 @@ class BaseVideoExtractor(abc.ABC): def __init__(self, video_path: Path): self.video_path = video_path - def extract(self) -> types.VideoMetadataOrError: + def extract(self) -> types.VideoMetadata: raise NotImplementedError diff --git a/mapillary_tools/geotag/video_extractors/gpx.py b/mapillary_tools/geotag/video_extractors/gpx.py index 27dcf62e9..163f980ab 100644 --- a/mapillary_tools/geotag/video_extractors/gpx.py +++ b/mapillary_tools/geotag/video_extractors/gpx.py @@ -1,9 +1,9 @@ from __future__ import annotations import dataclasses -import datetime import logging import sys +import enum import typing as T from pathlib import Path @@ -12,7 +12,7 @@ else: from typing_extensions import override -from ... import geo, telemetry, types +from ... import geo, telemetry, types, exceptions from ..utils import parse_gpx from .base import BaseVideoExtractor from .native import NativeVideoExtractor @@ -21,101 +21,96 @@ LOG = logging.getLogger(__name__) +class SyncMode(enum.Enum): + # Sync by video GPS timestamps if found, otherwise rebase + SYNC = "sync" + # Sync by video GPS timestamps, and throw if not found + STRICT_SYNC = "strict_sync" + # Rebase all GPX timestamps to start from 0 + REBASE = "rebase" + + class GPXVideoExtractor(BaseVideoExtractor): - def __init__(self, video_path: Path, gpx_path: Path): + def __init__( + self, video_path: Path, gpx_path: Path, sync_mode: SyncMode = SyncMode.SYNC + ): self.video_path = video_path self.gpx_path = gpx_path + self.sync_mode = sync_mode @override - def extract(self) -> types.VideoMetadataOrError: + def extract(self) -> types.VideoMetadata: gpx_tracks = parse_gpx(self.gpx_path) if 1 < len(gpx_tracks): LOG.warning( - "Found %s tracks in the GPX file %s. Will merge points in all the tracks as a single track for interpolation", - len(gpx_tracks), - self.gpx_path, + f"Found {len(gpx_tracks)} tracks in the GPX file {self.gpx_path}. Will merge points in all the tracks as a single track for interpolation" ) gpx_points: T.Sequence[geo.Point] = sum(gpx_tracks, []) native_extractor = NativeVideoExtractor(self.video_path) - video_metadata_or_error = native_extractor.extract() - - if isinstance(video_metadata_or_error, types.ErrorMetadata): + try: + native_video_metadata = native_extractor.extract() + except exceptions.MapillaryVideoGPSNotFoundError as ex: + if self.sync_mode is SyncMode.STRICT_SYNC: + raise ex self._rebase_times(gpx_points) return types.VideoMetadata( - filename=video_metadata_or_error.filename, - filetype=video_metadata_or_error.filetype or types.FileType.VIDEO, + filename=self.video_path, + filetype=types.FileType.VIDEO, points=gpx_points, ) - video_metadata = video_metadata_or_error - - offset = self._synx_gpx_by_first_gps_timestamp( - gpx_points, video_metadata.points - ) - - self._rebase_times(gpx_points, offset=offset) + if self.sync_mode is SyncMode.REBASE: + self._rebase_times(gpx_points) + else: + offset = self._gpx_offset(gpx_points, native_video_metadata.points) + self._rebase_times(gpx_points, offset=offset) - return dataclasses.replace(video_metadata_or_error, points=gpx_points) + return dataclasses.replace(native_video_metadata, points=gpx_points) - @staticmethod - def _rebase_times(points: T.Sequence[geo.Point], offset: float = 0.0): + @classmethod + def _rebase_times(cls, points: T.Sequence[geo.Point], offset: float = 0.0) -> None: """ - Make point times start from 0 + Rebase point times to start from **offset** """ if points: first_timestamp = points[0].time for p in points: p.time = (p.time - first_timestamp) + offset - return points - def _synx_gpx_by_first_gps_timestamp( - self, gpx_points: T.Sequence[geo.Point], video_gps_points: T.Sequence[geo.Point] + @classmethod + def _gpx_offset( + cls, gpx_points: T.Sequence[geo.Point], video_gps_points: T.Sequence[geo.Point] ) -> float: + """ + Calculate the offset that needs to be applied to the GPX points to sync with the video GPS points. + + >>> gpx_points = [geo.Point(time=5, lat=1, lon=1, alt=None, angle=None)] + >>> GPXVideoExtractor._gpx_offset(gpx_points, gpx_points) + 0.0 + >>> GPXVideoExtractor._gpx_offset(gpx_points, []) + 0.0 + >>> GPXVideoExtractor._gpx_offset([], gpx_points) + 0.0 + """ offset: float = 0.0 - if not gpx_points: - return offset - - first_gpx_dt = datetime.datetime.fromtimestamp( - gpx_points[0].time, tz=datetime.timezone.utc - ) - LOG.info("First GPX timestamp: %s", first_gpx_dt) - - if not video_gps_points: - LOG.warning( - "Skip GPX synchronization because no GPS found in video %s", - self.video_path, - ) + if not gpx_points or not video_gps_points: return offset - first_gps_point = video_gps_points[0] - if isinstance(first_gps_point, telemetry.GPSPoint): - if first_gps_point.epoch_time is not None: - first_gps_dt = datetime.datetime.fromtimestamp( - first_gps_point.epoch_time, tz=datetime.timezone.utc - ) - LOG.info("First GPS timestamp: %s", first_gps_dt) - offset = gpx_points[0].time - first_gps_point.epoch_time - if offset: - LOG.warning( - "Found offset between GPX %s and video GPS timestamps %s: %s seconds", - first_gpx_dt, - first_gps_dt, - offset, - ) - else: - LOG.info( - "GPX and GPS are perfectly synchronized (all starts from %s)", - first_gpx_dt, - ) - else: - LOG.warning( - "Skip GPX synchronization because no GPS epoch time found in video %s", - self.video_path, - ) + gps_epoch_time: float | None = None + gps_point = video_gps_points[0] + if isinstance(gps_point, telemetry.GPSPoint): + if gps_point.epoch_time is not None: + gps_epoch_time = gps_point.epoch_time + elif isinstance(gps_point, telemetry.CAMMGPSPoint): + if gps_point.time_gps_epoch is not None: + gps_epoch_time = gps_point.time_gps_epoch + + if gps_epoch_time is not None: + offset = gpx_points[0].time - gps_epoch_time return offset diff --git a/mapillary_tools/geotag/video_extractors/native.py b/mapillary_tools/geotag/video_extractors/native.py index b30d3160e..ca434ced3 100644 --- a/mapillary_tools/geotag/video_extractors/native.py +++ b/mapillary_tools/geotag/video_extractors/native.py @@ -17,7 +17,7 @@ class GoProVideoExtractor(BaseVideoExtractor): @override - def extract(self) -> types.VideoMetadataOrError: + def extract(self) -> types.VideoMetadata: with self.video_path.open("rb") as fp: gopro_info = gpmf_parser.extract_gopro_info(fp) @@ -29,23 +29,13 @@ def extract(self) -> types.VideoMetadataOrError: gps_points = gopro_info.gps assert gps_points is not None, "must have GPS data extracted" if not gps_points: - # Instead of raising an exception, return error metadata to tell the file type - ex: exceptions.MapillaryDescriptionError = ( - exceptions.MapillaryGPXEmptyError("Empty GPS data found") - ) - return types.describe_error_metadata( - ex, self.video_path, filetype=types.FileType.GOPRO - ) + raise exceptions.MapillaryGPXEmptyError("Empty GPS data found") gps_points = T.cast( T.List[telemetry.GPSPoint], gpmf_gps_filter.remove_noisy_points(gps_points) ) if not gps_points: - # Instead of raising an exception, return error metadata to tell the file type - ex = exceptions.MapillaryGPSNoiseError("GPS is too noisy") - return types.describe_error_metadata( - ex, self.video_path, filetype=types.FileType.GOPRO - ) + raise exceptions.MapillaryGPSNoiseError("GPS is too noisy") video_metadata = types.VideoMetadata( filename=self.video_path, @@ -61,7 +51,7 @@ def extract(self) -> types.VideoMetadataOrError: class CAMMVideoExtractor(BaseVideoExtractor): @override - def extract(self) -> types.VideoMetadataOrError: + def extract(self) -> types.VideoMetadata: with self.video_path.open("rb") as fp: camm_info = camm_parser.extract_camm_info(fp) @@ -71,13 +61,7 @@ def extract(self) -> types.VideoMetadataOrError: ) if not camm_info.gps and not camm_info.mini_gps: - # Instead of raising an exception, return error metadata to tell the file type - ex: exceptions.MapillaryDescriptionError = ( - exceptions.MapillaryGPXEmptyError("Empty GPS data found") - ) - return types.describe_error_metadata( - ex, self.video_path, filetype=types.FileType.CAMM - ) + raise exceptions.MapillaryGPXEmptyError("Empty GPS data found") return types.VideoMetadata( filename=self.video_path, @@ -91,7 +75,7 @@ def extract(self) -> types.VideoMetadataOrError: class BlackVueVideoExtractor(BaseVideoExtractor): @override - def extract(self) -> types.VideoMetadataOrError: + def extract(self) -> types.VideoMetadata: with self.video_path.open("rb") as fp: blackvue_info = blackvue_parser.extract_blackvue_info(fp) @@ -101,19 +85,13 @@ def extract(self) -> types.VideoMetadataOrError: ) if not blackvue_info.gps: - # Instead of raising an exception, return error metadata to tell the file type - ex: exceptions.MapillaryDescriptionError = ( - exceptions.MapillaryGPXEmptyError("Empty GPS data found") - ) - return types.describe_error_metadata( - ex, self.video_path, filetype=types.FileType.BLACKVUE - ) + raise exceptions.MapillaryGPXEmptyError("Empty GPS data found") video_metadata = types.VideoMetadata( filename=self.video_path, filesize=utils.get_file_size(self.video_path), filetype=types.FileType.BLACKVUE, - points=blackvue_info.gps or [], + points=blackvue_info.gps, make=blackvue_info.make, model=blackvue_info.model, ) @@ -127,7 +105,7 @@ def __init__(self, video_path: Path, filetypes: set[types.FileType] | None = Non self.filetypes = filetypes @override - def extract(self) -> types.VideoMetadataOrError: + def extract(self) -> types.VideoMetadata: ft = self.filetypes extractor: BaseVideoExtractor diff --git a/tests/integration/test_process.py b/tests/integration/test_process.py index 97d5f1633..7ff5142f9 100644 --- a/tests/integration/test_process.py +++ b/tests/integration/test_process.py @@ -7,6 +7,7 @@ import exifread import py.path import pytest +import shlex from .fixtures import ( assert_contains_image_descs, @@ -818,10 +819,73 @@ def test_video_process_multiple_videos(setup_data: py.path.local): assert 3 == len(filter_out_errors(descs)) -def test_process_video_geotag_source_gpx(setup_data: py.path.local): - video_path = setup_data.join("videos").join("sample-5s.mp4") +def run_process(params: list[str]): subprocess.run( - f"""{EXECUTABLE} process {PROCESS_FLAGS} --skip_process_errors --video_geotag_source=gpx {video_path}""", - shell=True, + [*shlex.split(EXECUTABLE), "process", *params], check=True, ) + + +def run_process_for_descs(params: list[str]): + with tempfile.NamedTemporaryFile(suffix=".json") as desc_file: + run_process( + [ + "--skip_process_errors", + *["--desc_path", str(desc_file.name)], + *params, + ], + ) + + with open(desc_file.name, "r") as fp: + fp.seek(0) + return json.load(fp) + + +def test_process_video_geotag_source_gpx_specified(setup_data: py.path.local): + video_path = setup_data.join("videos").join("sample-5s.mp4") + gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") + + descs = run_process_for_descs( + [ + *[ + "--video_geotag_source", + json.dumps({"source": "gpx", "source_path": str(gpx_file)}), + ], + str(video_path), + ] + ) + + assert len(descs) == 1 + assert len(descs[0]["MAPGPSTrack"]) > 0 + + +def test_process_video_geotag_source_gpx_not_found(setup_data: py.path.local): + video_path = setup_data.join("videos").join("sample-5s.mp4") + descs = run_process_for_descs( + [ + *["--video_geotag_source", "gpx"], + str(video_path), + ] + ) + + assert len(descs) == 1 + assert descs[0]["error"]["type"] == "MapillaryVideoGPSNotFoundError" + + +def test_process_video_geotag_source_gopro_gpx_specified(setup_data: py.path.local): + video_path = setup_data.join("gopro_data").join("max-360mode.mp4") + gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") + + descs = run_process_for_descs( + [ + *[ + "--video_geotag_source", + json.dumps({"source": "gpx", "source_path": str(gpx_file)}), + ], + str(video_path), + ] + ) + + assert len(descs) == 1 + assert descs[0]["MAPDeviceMake"] == "GoPro" + assert descs[0]["MAPDeviceModel"] == "GoPro Max" From 1683bc1a8cc192cc9b73cfebf0f0e24cc1de3d9b Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sat, 28 Jun 2025 19:12:14 -0700 Subject: [PATCH 12/26] refactor test_process.py --- tests/integration/fixtures.py | 36 ++- tests/integration/test_process.py | 469 +++++++----------------------- 2 files changed, 132 insertions(+), 373 deletions(-) diff --git a/tests/integration/fixtures.py b/tests/integration/fixtures.py index 19e6fc2b2..d87a15d81 100644 --- a/tests/integration/fixtures.py +++ b/tests/integration/fixtures.py @@ -2,6 +2,7 @@ import json import os +import shlex import shutil import subprocess import tempfile @@ -148,15 +149,18 @@ def run_exiftool(setup_data: py.path.local) -> py.path.local: def run_exiftool_and_generate_geotag_args( - test_data_dir: py.path.local, run_args: str -) -> str: + test_data_dir: py.path.local, run_args: list[str] +) -> list[str]: if not IS_EXIFTOOL_INSTALLED: pytest.skip("exiftool not installed") + exiftool_outuput_dir = run_exiftool(test_data_dir) - exiftool_params = ( - f"--geotag_source exiftool_xml --geotag_source_path {exiftool_outuput_dir}" - ) - return f"{run_args} {exiftool_params}" + return [ + *run_args, + "--geotag_source=exiftool_xml", + "--geotag_source_path", + str(exiftool_outuput_dir), + ] with open("schema/image_description_schema.json") as fp: @@ -362,3 +366,23 @@ def assert_contains_image_descs(haystack: Path | list[dict], needle: Path | list def assert_same_image_descs(left: Path | list[dict], right: Path | list[dict]): assert_contains_image_descs(left, right) assert_contains_image_descs(right, left) + + +def run_command(params: list[str], command: str): + subprocess.run([*shlex.split(EXECUTABLE), command, *params], check=True) + + +def run_command_for_descs(params: list[str], command: str): + with tempfile.NamedTemporaryFile(suffix=".json") as desc_file: + run_command( + [ + "--skip_process_errors", + *["--desc_path", str(desc_file.name)], + *params, + ], + command, + ) + + with open(desc_file.name, "r") as fp: + fp.seek(0) + return json.load(fp) diff --git a/tests/integration/test_process.py b/tests/integration/test_process.py index 7ff5142f9..1752bdd57 100644 --- a/tests/integration/test_process.py +++ b/tests/integration/test_process.py @@ -1,27 +1,22 @@ import datetime +import functools import json import subprocess -import tempfile from pathlib import Path -import exifread import py.path -import pytest -import shlex from .fixtures import ( assert_contains_image_descs, EXECUTABLE, - IS_FFMPEG_INSTALLED, + run_command, + run_command_for_descs, run_exiftool_and_generate_geotag_args, - setup_config, setup_data, validate_and_extract_zip, ) -PROCESS_FLAGS = "" - _DEFAULT_EXPECTED_DESCS = { "DSC00001.JPG": { "filename": "DSC00001.JPG", @@ -76,6 +71,9 @@ } +run_process_for_descs = functools.partial(run_command_for_descs, command="process") + + def _local_to_utc(ct: str): return ( datetime.datetime.fromisoformat(ct) @@ -91,17 +89,16 @@ def test_basic(): def test_process_images_with_defaults( - setup_data: py.path.local, - use_exiftool: bool = False, + setup_data: py.path.local, use_exiftool: bool = False ): - args = f"{EXECUTABLE} process --file_types=image {PROCESS_FLAGS} {setup_data}" + args = ["--file_types=image", str(setup_data)] if use_exiftool: args = run_exiftool_and_generate_geotag_args(setup_data, args) - x = subprocess.run(args, shell=True) - assert x.returncode == 0, x.stderr + descs = run_process_for_descs(args) + assert_contains_image_descs( - Path(setup_data, "mapillary_image_description.json"), + descs, [ { **_DEFAULT_EXPECTED_DESCS["DSC00001.JPG"], @@ -122,13 +119,15 @@ def test_process_images_with_defaults_with_exiftool(setup_data: py.path.local): def test_time_with_offset(setup_data: py.path.local, use_exiftool: bool = False): - args = f"{EXECUTABLE} process --file_types=image {PROCESS_FLAGS} {setup_data} --offset_time=2.5" + args = ["--file_types=image", "--offset_time=2.5", str(setup_data)] + if use_exiftool: args = run_exiftool_and_generate_geotag_args(setup_data, args) - x = subprocess.run(args, shell=True) - assert x.returncode == 0, x.stderr + + descs = run_process_for_descs(args) + assert_contains_image_descs( - Path(setup_data, "mapillary_image_description.json"), + descs, [ { **_DEFAULT_EXPECTED_DESCS["DSC00001.JPG"], @@ -148,13 +147,13 @@ def test_time_with_offset(setup_data: py.path.local, use_exiftool: bool = False) ], ) - args = f"{EXECUTABLE} process --file_types=image {PROCESS_FLAGS} {setup_data} --offset_time=-1.0" + args = ["--file_types=image", str(setup_data), "--offset_time=-1.0"] if use_exiftool: args = run_exiftool_and_generate_geotag_args(setup_data, args) - x = subprocess.run(args, shell=True) - assert x.returncode == 0, x.stderr + + descs = run_process_for_descs(args) assert_contains_image_descs( - Path(setup_data, "mapillary_image_description.json"), + descs, [ { **_DEFAULT_EXPECTED_DESCS["DSC00001.JPG"], @@ -182,11 +181,17 @@ def test_time_with_offset_with_exiftool(setup_data: py.path.local): def test_process_images_with_overwrite_all_EXIF_tags( setup_data: py.path.local, use_exiftool: bool = False ): - args = f"{EXECUTABLE} process --file_types=image --overwrite_all_EXIF_tags --offset_time=2.5 {PROCESS_FLAGS} {setup_data}" + args = [ + "--file_types=image", + "--overwrite_all_EXIF_tags", + "--offset_time=2.5", + str(setup_data), + ] if use_exiftool: args = run_exiftool_and_generate_geotag_args(setup_data, args) - x = subprocess.run(args, shell=True) - assert x.returncode == 0, x.stderr + + actual_descs = run_process_for_descs(args) + expected_descs = [ { **_DEFAULT_EXPECTED_DESCS["DSC00001.JPG"], # type: ignore @@ -205,19 +210,15 @@ def test_process_images_with_overwrite_all_EXIF_tags( }, ] assert_contains_image_descs( - Path(setup_data, "mapillary_image_description.json"), + actual_descs, expected_descs, ) - args = f"{EXECUTABLE} process --file_types=image {PROCESS_FLAGS} {setup_data}" + args = ["--file_types=image", str(setup_data)] if use_exiftool: args = run_exiftool_and_generate_geotag_args(setup_data, args) - x = subprocess.run(args, shell=True) - assert x.returncode == 0, x.stderr - assert_contains_image_descs( - Path(setup_data, "mapillary_image_description.json"), - expected_descs, - ) + descs = run_process_for_descs(args) + assert_contains_image_descs(descs, expected_descs) def test_process_images_with_overwrite_all_EXIF_tags_with_exiftool( @@ -229,14 +230,15 @@ def test_process_images_with_overwrite_all_EXIF_tags_with_exiftool( def test_angle_with_offset(setup_data: py.path.local, use_exiftool: bool = False): - args = f"{EXECUTABLE} process --file_types=image {PROCESS_FLAGS} {setup_data} --offset_angle=2.5" + args = ["--file_types=image", str(setup_data), "--offset_angle=2.5"] + if use_exiftool: args = run_exiftool_and_generate_geotag_args(setup_data, args) - x = subprocess.run(args, shell=True) - assert x.returncode == 0, x.stderr + + descs = run_process_for_descs(args) assert_contains_image_descs( - Path(setup_data, "mapillary_image_description.json"), + descs, [ { **_DEFAULT_EXPECTED_DESCS["DSC00001.JPG"], @@ -272,10 +274,10 @@ def test_angle_with_offset_with_exiftool(setup_data: py.path.local): def test_parse_adobe_coordinates(setup_data: py.path.local): - args = f"{EXECUTABLE} process --file_types=image {PROCESS_FLAGS} {setup_data}/adobe_coords" - x = subprocess.run(args, shell=True) + args = ["--file_types=image", str(setup_data.join("adobe_coords"))] + descs = run_process_for_descs(args) assert_contains_image_descs( - Path(setup_data, "adobe_coords/mapillary_image_description.json"), + descs, [ { "filename": str(Path(setup_data, "adobe_coords", "adobe_coords.jpg")), @@ -294,22 +296,12 @@ def test_parse_adobe_coordinates(setup_data: py.path.local): def test_zip(tmpdir: py.path.local, setup_data: py.path.local): zip_dir = tmpdir.mkdir("zip_dir") - x = subprocess.run( - f"{EXECUTABLE} process --file_types=image {PROCESS_FLAGS} {setup_data}", - shell=True, - ) - assert x.returncode == 0, x.stderr - x = subprocess.run( - f"{EXECUTABLE} zip {setup_data} {zip_dir}", - shell=True, - ) - assert x.returncode == 0, x.stderr + run_command([str(setup_data), str(zip_dir)], command="zip") assert 0 < len(zip_dir.listdir()) for file in zip_dir.listdir(): validate_and_extract_zip(Path(file)) -@pytest.mark.usefixtures("setup_config") def test_process_boolean_options(setup_data: py.path.local): boolean_options = [ "--interpolate_directions", @@ -321,17 +313,20 @@ def test_process_boolean_options(setup_data: py.path.local): "--skip_subfolders", ] for option in boolean_options: - x = subprocess.run( - f"{EXECUTABLE} process --file_types=image {PROCESS_FLAGS} {option} {setup_data}", - shell=True, + run_process_for_descs( + [ + "--file_types=image", + option, + str(setup_data), + ] ) - assert x.returncode == 0, x.stderr - all_options = " ".join(boolean_options) - x = subprocess.run( - f"{EXECUTABLE} process --file_types=image {PROCESS_FLAGS} {all_options} {setup_data}", - shell=True, + run_process_for_descs( + [ + "--file_types=image", + *boolean_options, + str(setup_data), + ] ) - assert x.returncode == 0, x.stderr GPX_CONTENT = """ @@ -378,18 +373,18 @@ def test_geotagging_images_from_gpx(setup_data: py.path.local): fp.write(GPX_CONTENT) images = setup_data.join("images") - x = subprocess.run( - f"""{EXECUTABLE} process {PROCESS_FLAGS} \ - --file_types=image \ - --geotag_source=gpx \ - --geotag_source_path={gpx_file} \ - --skip_process_errors \ - {images} -""", - shell=True, + descs = run_process_for_descs( + [ + "--file_types=image", + "--geotag_source=gpx", + "--geotag_source_path", + str(gpx_file), + str(images), + ] ) + assert_contains_image_descs( - Path(images, "mapillary_image_description.json"), + descs, [ { **_DEFAULT_EXPECTED_DESCS["DSC00001.JPG"], @@ -422,13 +417,18 @@ def test_geotagging_images_from_gpx_with_offset(setup_data: py.path.local): with gpx_file.open("w") as fp: fp.write(GPX_CONTENT) - x = subprocess.run( - f"{EXECUTABLE} --verbose process --file_types=image {PROCESS_FLAGS} {setup_data} --geotag_source gpx --geotag_source_path {gpx_file} --interpolation_offset_time=-20 --skip_process_errors", - shell=True, + descs = run_process_for_descs( + [ + "--file_types=image", + "--geotag_source=gpx", + "--geotag_source_path", + str(gpx_file), + str(setup_data), + "--interpolation_offset_time=-20", + ] ) - assert x.returncode == 0, x.stderr assert_contains_image_descs( - Path(setup_data, "mapillary_image_description.json"), + descs, [ { **_DEFAULT_EXPECTED_DESCS["DSC00001.JPG"], @@ -462,13 +462,18 @@ def test_geotagging_images_from_gpx_use_gpx_start_time(setup_data: py.path.local gpx_file = setup_data.join("test.gpx") with gpx_file.open("w") as fp: fp.write(GPX_CONTENT) - x = subprocess.run( - f"{EXECUTABLE} process --file_types=image {PROCESS_FLAGS} {setup_data} --geotag_source gpx --interpolation_use_gpx_start_time --geotag_source_path {gpx_file} --skip_process_errors", - shell=True, + descs = run_process_for_descs( + [ + "--file_types=image", + *["--geotag_source", "gpx"], + "--interpolation_use_gpx_start_time", + "--geotag_source_path", + str(gpx_file), + str(setup_data), + ] ) - assert x.returncode == 0, x.stderr assert_contains_image_descs( - Path(setup_data, "mapillary_image_description.json"), + descs, [ { **_DEFAULT_EXPECTED_DESCS["DSC00001.JPG"], @@ -504,13 +509,19 @@ def test_geotagging_images_from_gpx_use_gpx_start_time_with_offset( gpx_file = setup_data.join("test.gpx") with gpx_file.open("w") as fp: fp.write(GPX_CONTENT) - x = subprocess.run( - f"{EXECUTABLE} process --file_types=image {PROCESS_FLAGS} {setup_data} --geotag_source gpx --interpolation_use_gpx_start_time --geotag_source_path {gpx_file} --interpolation_offset_time=100 --skip_process_errors", - shell=True, + descs = run_process_for_descs( + [ + "--file_types=image", + str(setup_data), + *["--geotag_source", "gpx"], + "--interpolation_use_gpx_start_time", + "--geotag_source_path", + str(gpx_file), + "--interpolation_offset_time=100", + ] ) - assert x.returncode == 0, x.stderr assert_contains_image_descs( - Path(setup_data, "mapillary_image_description.json"), + descs, [ { **_DEFAULT_EXPECTED_DESCS["DSC00001.JPG"], @@ -542,14 +553,7 @@ def test_geotagging_images_from_gpx_use_gpx_start_time_with_offset( def test_process_filetypes(setup_data: py.path.local): video_dir = setup_data.join("gopro_data") - x = subprocess.run( - f"{EXECUTABLE} --verbose process {PROCESS_FLAGS} --skip_process_errors {video_dir}", - shell=True, - ) - assert x.returncode == 0, x.stderr - desc_path = video_dir.join("mapillary_image_description.json") - with open(desc_path) as fp: - descs = json.load(fp) + descs = run_process_for_descs([str(video_dir)]) assert 2 == len(descs) assert 1 == len(find_desc_errors(descs)) assert 1 == len(filter_out_errors(descs)) @@ -558,289 +562,19 @@ def test_process_filetypes(setup_data: py.path.local): def test_process_unsupported_filetypes(setup_data: py.path.local): video_dir = setup_data.join("gopro_data") for filetypes in ["blackvue"]: - x = subprocess.run( - f"{EXECUTABLE} --verbose process --filetypes={filetypes} --geotag_source=native {PROCESS_FLAGS} --skip_process_errors {video_dir}", - shell=True, + descs = run_process_for_descs( + ["--filetypes", filetypes, "--geotag_source=native", str(video_dir)] ) - assert x.returncode == 0, x.stderr - desc_path = video_dir.join("mapillary_image_description.json") - with open(desc_path) as fp: - descs = json.load(fp) assert 2 == len(descs) assert 2 == len(find_desc_errors(descs)) for filetypes in ["image"]: - x = subprocess.run( - f"{EXECUTABLE} --verbose process --filetypes={filetypes} --geotag_source=native {PROCESS_FLAGS} --skip_process_errors {video_dir}", - shell=True, + descs = run_process_for_descs( + ["--filetypes", filetypes, "--geotag_source=native", str(video_dir)] ) - assert x.returncode == 0, x.stderr - desc_path = video_dir.join("mapillary_image_description.json") - with open(desc_path) as fp: - descs = json.load(fp) assert 0 == len(descs) -def test_sample_video_relpath(): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") - - with tempfile.TemporaryDirectory() as dir: - x = subprocess.run( - f"{EXECUTABLE} sample_video --rerun tests/data/gopro_data/hero8.mp4 {dir}", - shell=True, - ) - assert x.returncode == 0, x.stderr - - -def test_sample_video_relpath_dir(): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") - - with tempfile.TemporaryDirectory() as dir: - x = subprocess.run( - f"{EXECUTABLE} sample_video --rerun --video_start_time 2021_10_10_10_10_10_123 tests/integration {dir}", - shell=True, - ) - assert x.returncode == 0, x.stderr - - -def test_sample_video_without_video_time(setup_data: py.path.local): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") - - video_dir = setup_data.join("videos") - root_sample_dir = video_dir.join("mapillary_sampled_video_frames") - - for input_path in [video_dir, video_dir.join("sample-5s.mp4")]: - x = subprocess.run( - f"{EXECUTABLE} sample_video --video_sample_interval=2 --video_sample_distance=-1 --video_sample_distance=-1 --rerun {input_path}", - shell=True, - ) - assert x.returncode == 7, x.stderr - if root_sample_dir.exists(): - assert len(root_sample_dir.listdir()) == 0 - - x = subprocess.run( - f"{EXECUTABLE} sample_video --video_sample_interval=2 --video_sample_distance=-1 --skip_sample_errors --rerun {input_path}", - shell=True, - ) - assert x.returncode == 0, x.stderr - if root_sample_dir.exists(): - assert len(root_sample_dir.listdir()) == 0 - - x = subprocess.run( - f"{EXECUTABLE} sample_video --video_sample_interval=2 --video_sample_distance=-1 --video_start_time 2021_10_10_10_10_10_123 --rerun {input_path}", - shell=True, - ) - assert x.returncode == 0, x.stderr - assert len(root_sample_dir.listdir()) == 1 - samples = root_sample_dir.join("sample-5s.mp4").listdir() - samples.sort() - times = [] - for s in samples: - with s.open("rb") as fp: - tags = exifread.process_file(fp) - times.append(tags["EXIF DateTimeOriginal"].values) - assert ( - "2021:10:10 10:10:10", - "2021:10:10 10:10:12", - "2021:10:10 10:10:14", - ) == tuple(times) - - -def test_video_process(setup_data: py.path.local): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") - - video_dir = setup_data.join("videos") - gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") - gpx_start_time = "2025_03_14_07_00_00_000" - gpx_end_time = "2025_03_14_07_01_33_624" - video_start_time = "2025_03_14_07_00_00_000" - desc_path = video_dir.join("my_samples").join("mapillary_image_description.json") - x = subprocess.run( - f"""{EXECUTABLE} --verbose video_process \ - {PROCESS_FLAGS} \ - --video_sample_interval=2 \ - --video_sample_distance=-1 \ - --skip_process_errors \ - --video_start_time {video_start_time} \ - --geotag_source gpx \ - --geotag_source_path {gpx_file} {video_dir} {video_dir.join("my_samples")} -""", - shell=True, - ) - assert x.returncode == 0, x.stderr - with open(desc_path) as fp: - descs = json.load(fp) - assert 0 == len(find_desc_errors(descs)) - assert 3 == len(filter_out_errors(descs)) - - -def test_video_process_sample_with_multiple_distances(setup_data: py.path.local): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") - - video_dir = setup_data.join("gopro_data") - desc_path = video_dir.join("my_samples").join("mapillary_image_description.json") - for distance in [0, 2.4, 100]: - x = subprocess.run( - f"{EXECUTABLE} --verbose video_process --video_sample_distance={distance} --rerun {PROCESS_FLAGS} {video_dir} {video_dir.join('my_samples')}", - shell=True, - ) - assert x.returncode == 0, x.stderr - with open(desc_path) as fp: - descs = json.load(fp) - if distance == 100: - assert 1 == len(descs) - else: - assert len(descs) > 1 - - -def test_video_process_sample_with_distance(setup_data: py.path.local): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") - - video_dir = setup_data.join("gopro_data") - sample_dir = Path(setup_data, "gopro_data", "my_samples") - desc_path = Path(sample_dir, "mapillary_image_description.json") - for option in [ - "--video_sample_distance=6", - "--video_sample_distance=6 --video_sample_interval=-2", - ]: - x = subprocess.run( - f"{EXECUTABLE} --verbose video_process {option} {PROCESS_FLAGS} {video_dir} {video_dir.join('my_samples')}", - shell=True, - ) - assert x.returncode == 0, x.stderr - assert_contains_image_descs( - desc_path, - [ - { - "filename": str( - Path( - sample_dir, - "max-360mode.mp4", - "max-360mode_0_000001.jpg", - ) - ), - "filetype": "image", - "MAPLatitude": 33.1266719, - "MAPLongitude": -117.3273063, - "MAPCaptureTime": "2019_11_18_15_44_47_862", - "MAPAltitude": -22.18, - "MAPCompassHeading": { - "TrueHeading": 313.68, - "MagneticHeading": 313.68, - }, - "MAPSequenceUUID": "0", - "MAPDeviceMake": "GoPro", - "MAPDeviceModel": "GoPro Max", - "MAPOrientation": 1, - }, - { - "filename": str( - Path( - sample_dir, - "max-360mode.mp4", - "max-360mode_0_000002.jpg", - ) - ), - "filetype": "image", - "MAPLatitude": 33.1267206, - "MAPLongitude": -117.3273345, - "MAPCaptureTime": "2019_11_18_15_44_53_159", - "MAPAltitude": -21.91, - "MAPCompassHeading": { - "TrueHeading": 330.82, - "MagneticHeading": 330.82, - }, - "MAPSequenceUUID": "0", - "MAPDeviceMake": "GoPro", - "MAPDeviceModel": "GoPro Max", - "MAPOrientation": 1, - }, - { - "filename": str( - Path( - sample_dir, - "max-360mode.mp4", - "max-360mode_0_000003.jpg", - ) - ), - "filetype": "image", - "MAPLatitude": 33.1267702, - "MAPLongitude": -117.3273612, - "MAPCaptureTime": "2019_11_18_15_44_58_289", - "MAPAltitude": -22.58, - "MAPCompassHeading": { - "TrueHeading": 10.54, - "MagneticHeading": 10.54, - }, - "MAPSequenceUUID": "0", - "MAPDeviceMake": "GoPro", - "MAPDeviceModel": "GoPro Max", - "MAPOrientation": 1, - }, - ], - ) - - -def test_video_process_multiple_videos(setup_data: py.path.local): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") - - desc_path = setup_data.join("my_samples").join("mapillary_image_description.json") - sub_folder = setup_data.join("video_sub_folder").mkdir() - video_path = setup_data.join("videos").join("sample-5s.mp4") - video_path.copy(sub_folder) - gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") - gpx_start_time = "2025_03_14_07_00_00_000" - gpx_end_time = "2025_03_14_07_01_33_624" - x = subprocess.run( - f"""{EXECUTABLE} video_process {PROCESS_FLAGS} \ - --video_sample_interval=2 \ - --video_sample_distance=-1 \ - --video_start_time={gpx_start_time} \ - --geotag_source=gpx \ - --geotag_source_path={gpx_file} \ - {video_path} {setup_data.join("my_samples")} -""", - shell=True, - ) - assert x.returncode == 0, x.stderr - with open(desc_path) as fp: - descs = json.load(fp) - for d in descs: - assert Path(d["filename"]).is_file(), d["filename"] - assert "sample-5s.mp4" in d["filename"] - assert 0 == len(find_desc_errors(descs)) - assert 3 == len(filter_out_errors(descs)) - - -def run_process(params: list[str]): - subprocess.run( - [*shlex.split(EXECUTABLE), "process", *params], - check=True, - ) - - -def run_process_for_descs(params: list[str]): - with tempfile.NamedTemporaryFile(suffix=".json") as desc_file: - run_process( - [ - "--skip_process_errors", - *["--desc_path", str(desc_file.name)], - *params, - ], - ) - - with open(desc_file.name, "r") as fp: - fp.seek(0) - return json.load(fp) - - def test_process_video_geotag_source_gpx_specified(setup_data: py.path.local): video_path = setup_data.join("videos").join("sample-5s.mp4") gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") @@ -889,3 +623,4 @@ def test_process_video_geotag_source_gopro_gpx_specified(setup_data: py.path.loc assert len(descs) == 1 assert descs[0]["MAPDeviceMake"] == "GoPro" assert descs[0]["MAPDeviceModel"] == "GoPro Max" + assert len(descs[0]["MAPGPSTrack"]) > 0 From 8fbeb928a8c00b0c57d372f12bbab0a86a4c83e4 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sat, 28 Jun 2025 19:57:39 -0700 Subject: [PATCH 13/26] git add tests/integration/test_video_process.py --- tests/integration/test_video_process.py | 275 ++++++++++++++++++++++++ 1 file changed, 275 insertions(+) create mode 100644 tests/integration/test_video_process.py diff --git a/tests/integration/test_video_process.py b/tests/integration/test_video_process.py new file mode 100644 index 000000000..b245254e7 --- /dev/null +++ b/tests/integration/test_video_process.py @@ -0,0 +1,275 @@ +import functools +import subprocess +import tempfile +from pathlib import Path + +import exifread + +import py.path +import pytest + +from .fixtures import ( + assert_contains_image_descs, + IS_FFMPEG_INSTALLED, + run_command, + run_command_for_descs, + setup_data, +) + + +run_video_process_for_descs = functools.partial( + run_command_for_descs, command="video_process" +) + +run_sample_video = functools.partial(run_command, command="sample_video") + + +def test_sample_video_relpath(): + if not IS_FFMPEG_INSTALLED: + pytest.skip("skip because ffmpeg not installed") + + with tempfile.TemporaryDirectory() as dir: + run_sample_video(["--rerun", "tests/data/gopro_data/hero8.mp4", str(dir)]) + + with tempfile.TemporaryDirectory() as dir: + run_sample_video( + [ + "--rerun", + "--video_start_time", + "2021_10_10_10_10_10_123", + "tests/data", + str(dir), + ] + ) + + +def test_sample_video_without_video_time(setup_data: py.path.local): + if not IS_FFMPEG_INSTALLED: + pytest.skip("skip because ffmpeg not installed") + + video_dir = setup_data.join("videos") + root_sample_dir = video_dir.join("mapillary_sampled_video_frames") + + for input_path in [video_dir, video_dir.join("sample-5s.mp4")]: + with pytest.raises(subprocess.CalledProcessError) as ex: + run_sample_video( + [ + "--video_sample_interval=2", + "--video_sample_distance=-1", + "--rerun", + input_path, + ] + ) + assert 7 == ex.value.returncode, ex.value.stderr + + if root_sample_dir.exists(): + assert len(root_sample_dir.listdir()) == 0 + + run_sample_video( + [ + "--video_sample_interval=2", + "--video_sample_distance=-1", + "--skip_sample_errors", + "--rerun", + input_path, + ] + ) + if root_sample_dir.exists(): + assert len(root_sample_dir.listdir()) == 0 + + +def test_sample_video_specify_video_start_time(setup_data: py.path.local): + if not IS_FFMPEG_INSTALLED: + pytest.skip("skip because ffmpeg not installed") + + video_dir = setup_data.join("videos") + root_sample_dir = video_dir.join("mapillary_sampled_video_frames") + + for input_path in [video_dir, video_dir.join("sample-5s.mp4")]: + run_sample_video( + [ + "--video_sample_interval=2", + "--video_sample_distance=-1", + "--video_start_time=2021_10_10_10_10_10_123", + "--rerun", + input_path, + ] + ) + samples = root_sample_dir.join("sample-5s.mp4").listdir() + samples.sort() + times = [] + for s in samples: + with s.open("rb") as fp: + tags = exifread.process_file(fp) + times.append(tags["EXIF DateTimeOriginal"].values) + assert ( + "2021:10:10 10:10:10", + "2021:10:10 10:10:12", + "2021:10:10 10:10:14", + ) == tuple(times) + + +def test_video_process(setup_data: py.path.local): + if not IS_FFMPEG_INSTALLED: + pytest.skip("skip because ffmpeg not installed") + + video_dir = setup_data.join("videos") + gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") + gpx_start_time = "2025_03_14_07_00_00_000" + gpx_end_time = "2025_03_14_07_01_33_624" + video_start_time = "2025_03_14_07_00_00_000" + + descs = run_video_process_for_descs( + [ + "--video_sample_interval=2", + "--video_sample_distance=-1", + *["--video_start_time", video_start_time], + "--geotag_source=gpx", + *["--geotag_source_path", str(gpx_file)], + str(video_dir), + str(video_dir.join("my_samples")), + ] + ) + assert 3 == len(descs) + assert 0 == len([d for d in descs if "error" in d]) + + +def test_video_process_sample_with_multiple_distances(setup_data: py.path.local): + if not IS_FFMPEG_INSTALLED: + pytest.skip("skip because ffmpeg not installed") + + video_dir = setup_data.join("gopro_data") + for distance in [0, 2.4, 100]: + descs = run_video_process_for_descs( + [ + "--video_sample_distance", + str(distance), + "--rerun", + str(video_dir), + str(video_dir.join("my_samples")), + ] + ) + if distance == 100: + assert 1 == len(descs) + else: + assert len(descs) > 1 + + +def test_video_process_sample_with_distance(setup_data: py.path.local): + if not IS_FFMPEG_INSTALLED: + pytest.skip("skip because ffmpeg not installed") + + video_dir = setup_data.join("gopro_data") + sample_dir = Path(setup_data, "gopro_data", "my_samples") + + for options in [ + ["--video_sample_distance=6"], + ["--video_sample_distance=6", "--video_sample_interval=-2"], + ]: + descs = run_video_process_for_descs( + [ + *options, + str(video_dir), + str(video_dir.join("my_samples")), + ] + ) + assert_contains_image_descs( + descs, + [ + { + "filename": str( + Path( + sample_dir, + "max-360mode.mp4", + "max-360mode_0_000001.jpg", + ) + ), + "filetype": "image", + "MAPLatitude": 33.1266719, + "MAPLongitude": -117.3273063, + "MAPCaptureTime": "2019_11_18_15_44_47_862", + "MAPAltitude": -22.18, + "MAPCompassHeading": { + "TrueHeading": 313.68, + "MagneticHeading": 313.68, + }, + "MAPSequenceUUID": "0", + "MAPDeviceMake": "GoPro", + "MAPDeviceModel": "GoPro Max", + "MAPOrientation": 1, + }, + { + "filename": str( + Path( + sample_dir, + "max-360mode.mp4", + "max-360mode_0_000002.jpg", + ) + ), + "filetype": "image", + "MAPLatitude": 33.1267206, + "MAPLongitude": -117.3273345, + "MAPCaptureTime": "2019_11_18_15_44_53_159", + "MAPAltitude": -21.91, + "MAPCompassHeading": { + "TrueHeading": 330.82, + "MagneticHeading": 330.82, + }, + "MAPSequenceUUID": "0", + "MAPDeviceMake": "GoPro", + "MAPDeviceModel": "GoPro Max", + "MAPOrientation": 1, + }, + { + "filename": str( + Path( + sample_dir, + "max-360mode.mp4", + "max-360mode_0_000003.jpg", + ) + ), + "filetype": "image", + "MAPLatitude": 33.1267702, + "MAPLongitude": -117.3273612, + "MAPCaptureTime": "2019_11_18_15_44_58_289", + "MAPAltitude": -22.58, + "MAPCompassHeading": { + "TrueHeading": 10.54, + "MagneticHeading": 10.54, + }, + "MAPSequenceUUID": "0", + "MAPDeviceMake": "GoPro", + "MAPDeviceModel": "GoPro Max", + "MAPOrientation": 1, + }, + ], + ) + + +def test_video_process_multiple_videos(setup_data: py.path.local): + if not IS_FFMPEG_INSTALLED: + pytest.skip("skip because ffmpeg not installed") + + sub_folder = setup_data.join("video_sub_folder").mkdir() + video_path = setup_data.join("videos").join("sample-5s.mp4") + video_path.copy(sub_folder) + gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") + gpx_start_time = "2025_03_14_07_00_00_000" + gpx_end_time = "2025_03_14_07_01_33_624" + descs = run_video_process_for_descs( + [ + "--video_sample_interval=2", + "--video_sample_distance=-1", + *["--video_start_time", gpx_start_time], + "--geotag_source=gpx", + "--geotag_source_path", + str(gpx_file), + str(sub_folder), + str(setup_data.join("my_samples")), + ] + ) + for d in descs: + assert Path(d["filename"]).is_file(), d["filename"] + assert "sample-5s.mp4" in d["filename"] + assert 3 == len(descs) + assert 0 == len([d for d in descs if "error" in d]) From 0c474de611a7580c4f67418544854e20647ac89f Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sat, 28 Jun 2025 23:15:09 -0700 Subject: [PATCH 14/26] fix test_gopro --- tests/integration/fixtures.py | 11 ++++++++--- tests/integration/test_gopro.py | 29 ++++++++++++++++++++--------- 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/tests/integration/fixtures.py b/tests/integration/fixtures.py index d87a15d81..668db7169 100644 --- a/tests/integration/fixtures.py +++ b/tests/integration/fixtures.py @@ -368,11 +368,15 @@ def assert_same_image_descs(left: Path | list[dict], right: Path | list[dict]): assert_contains_image_descs(right, left) -def run_command(params: list[str], command: str): - subprocess.run([*shlex.split(EXECUTABLE), command, *params], check=True) +def run_command(params: list[str], command: str, **kwargs): + subprocess.run( + [*shlex.split(EXECUTABLE), command, *params], + check=True, + **kwargs, + ) -def run_command_for_descs(params: list[str], command: str): +def run_command_for_descs(params: list[str], command: str, **kwargs): with tempfile.NamedTemporaryFile(suffix=".json") as desc_file: run_command( [ @@ -381,6 +385,7 @@ def run_command_for_descs(params: list[str], command: str): *params, ], command, + **kwargs, ) with open(desc_file.name, "r") as fp: diff --git a/tests/integration/test_gopro.py b/tests/integration/test_gopro.py index a7609717f..705a09b6d 100644 --- a/tests/integration/test_gopro.py +++ b/tests/integration/test_gopro.py @@ -1,21 +1,23 @@ import copy +import functools import os -import subprocess import typing as T -from pathlib import Path import py.path import pytest from .fixtures import ( assert_same_image_descs, - EXECUTABLE, IS_FFMPEG_INSTALLED, + run_command_for_descs, run_exiftool_and_generate_geotag_args, setup_config, setup_upload, ) +run_video_process_for_descs = functools.partial( + run_command_for_descs, command="video_process" +) IMPORT_PATH = "tests/data/gopro_data" TEST_ENVS = { @@ -137,22 +139,31 @@ def test_process_gopro_hero8( if not IS_FFMPEG_INSTALLED: pytest.skip("skip because ffmpeg not installed") video_path = setup_data.join("hero8.mp4") + if use_exiftool: - args = f"{EXECUTABLE} --verbose video_process --video_sample_interval=2 --video_sample_distance=-1 {str(video_path)}" + args = [ + "--video_sample_interval=2", + "--video_sample_distance=-1", + str(video_path), + ] args = run_exiftool_and_generate_geotag_args(setup_data, args) else: - args = f"{EXECUTABLE} --verbose video_process --video_sample_interval=2 --video_sample_distance=-1 --geotag_source=gopro_videos {str(video_path)}" + args = [ + "--video_sample_interval=2", + "--video_sample_distance=-1", + "--geotag_source=gopro_videos", + str(video_path), + ] + env = os.environ.copy() env.update(TEST_ENVS) - x = subprocess.run(args, shell=True, env=env) - assert x.returncode == 0, x.stderr + descs = run_video_process_for_descs(args, env=env) sample_dir = setup_data.join("mapillary_sampled_video_frames") - desc_path = sample_dir.join("mapillary_image_description.json") expected_descs = copy.deepcopy(EXPECTED_DESCS) for expected_desc in expected_descs: expected_desc["filename"] = str(sample_dir.join(expected_desc["filename"])) - assert_same_image_descs(Path(desc_path), expected_descs) + assert_same_image_descs(descs, expected_descs) @pytest.mark.usefixtures("setup_config") From 0a733501a592f8ef71a638a71cdb38b4dd7b6180 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sun, 29 Jun 2025 01:40:15 -0700 Subject: [PATCH 15/26] add tests --- tests/integration/fixtures.py | 65 +++++---- tests/integration/test_gopro.py | 4 +- tests/integration/test_history.py | 47 ++---- tests/integration/test_process.py | 22 ++- tests/integration/test_process_and_upload.py | 105 +++++++------- tests/integration/test_upload.py | 145 +++++++------------ tests/integration/test_video_process.py | 6 +- 7 files changed, 167 insertions(+), 227 deletions(-) diff --git a/tests/integration/fixtures.py b/tests/integration/fixtures.py index 668db7169..b0ce6cddb 100644 --- a/tests/integration/fixtures.py +++ b/tests/integration/fixtures.py @@ -33,11 +33,13 @@ def setup_config(tmpdir: py.path.local): os.environ["MAPILLARY_CONFIG_PATH"] = str(config_path) os.environ["MAPILLARY_TOOLS_PROMPT_DISABLED"] = "YES" os.environ["MAPILLARY_TOOLS__AUTH_VERIFICATION_DISABLED"] = "YES" - x = subprocess.run( - f"{EXECUTABLE} authenticate --user_name {USERNAME} --jwt test_user_token", - shell=True, + run_command( + [ + *["--user_name", USERNAME], + *["--jwt", "test_user_token"], + ], + command="authenticate", ) - assert x.returncode == 0, x.stderr yield config_path if tmpdir.check(): tmpdir.remove(ignore_errors=True) @@ -83,12 +85,14 @@ def _ffmpeg_installed(): [ffmpeg_path, "-version"], stderr=subprocess.PIPE, stdout=subprocess.PIPE, + check=True, ) # In Windows, ffmpeg is installed but ffprobe is not? subprocess.run( [ffprobe_path, "-version"], stderr=subprocess.PIPE, stdout=subprocess.PIPE, + check=True, ) except FileNotFoundError: return False @@ -104,7 +108,7 @@ def _exiftool_installed(): [EXIFTOOL_EXECUTABLE, "-ver"], stderr=subprocess.PIPE, stdout=subprocess.PIPE, - shell=True, + check=True, ) except FileNotFoundError: return False @@ -211,27 +215,9 @@ def validate_and_extract_camm(video_path: Path) -> list[dict]: upload_md5sum, ) - if not IS_FFMPEG_INSTALLED: - return [] - - with tempfile.TemporaryDirectory() as tempdir: - x = subprocess.run( - f"{EXECUTABLE} --verbose video_process --video_sample_interval=2 --video_sample_distance=-1 --geotag_source=camm {str(video_path)} {tempdir}", - shell=True, - ) - assert x.returncode == 0, x.stderr - - # no exif written so we can't extract the image description - # descs = [] - # for root, _, files in os.walk(tempdir): - # for file in files: - # if file.endswith(".jpg"): - # descs.append(validate_and_extract_image(os.path.join(root, file))) - # return descs - - # instead, we return the mapillary_image_description.json - with open(os.path.join(tempdir, "mapillary_image_description.json")) as fp: - return json.load(fp) + return run_process_for_descs( + ["--file_types=camm", str(video_path)], command="process" + ) def load_descs(descs) -> list: @@ -339,6 +325,11 @@ def assert_compare_image_descs(expected: dict, actual: dict): if "MAPDeviceModel" in expected: assert expected["MAPDeviceModel"] == actual["MAPDeviceModel"] + if "MAPGPSTrack" in expected: + assert expected["MAPGPSTrack"] == actual["MAPGPSTrack"], ( + f"expect {expected['MAPGPSTrack']} but got {actual['MAPGPSTrack']} in {filename}" + ) + def assert_contains_image_descs(haystack: Path | list[dict], needle: Path | list[dict]): """ @@ -369,14 +360,10 @@ def assert_same_image_descs(left: Path | list[dict], right: Path | list[dict]): def run_command(params: list[str], command: str, **kwargs): - subprocess.run( - [*shlex.split(EXECUTABLE), command, *params], - check=True, - **kwargs, - ) + subprocess.run([*shlex.split(EXECUTABLE), command, *params], check=True, **kwargs) -def run_command_for_descs(params: list[str], command: str, **kwargs): +def run_process_for_descs(params: list[str], command: str = "process", **kwargs): with tempfile.NamedTemporaryFile(suffix=".json") as desc_file: run_command( [ @@ -391,3 +378,17 @@ def run_command_for_descs(params: list[str], command: str, **kwargs): with open(desc_file.name, "r") as fp: fp.seek(0) return json.load(fp) + + +def run_process_and_upload_for_descs( + params: list[str], command="process_and_upload", **kwargs +): + return run_process_for_descs( + ["--dry_run", *["--user_name", USERNAME], *params], command=command, **kwargs + ) + + +def run_upload(params: list[str], **kwargs): + return run_command( + ["--dry_run", *["--user_name", USERNAME], *params], command="upload", **kwargs + ) diff --git a/tests/integration/test_gopro.py b/tests/integration/test_gopro.py index 705a09b6d..5b34b6e3e 100644 --- a/tests/integration/test_gopro.py +++ b/tests/integration/test_gopro.py @@ -9,14 +9,14 @@ from .fixtures import ( assert_same_image_descs, IS_FFMPEG_INSTALLED, - run_command_for_descs, run_exiftool_and_generate_geotag_args, + run_process_for_descs, setup_config, setup_upload, ) run_video_process_for_descs = functools.partial( - run_command_for_descs, command="video_process" + run_process_for_descs, command="video_process" ) IMPORT_PATH = "tests/data/gopro_data" diff --git a/tests/integration/test_history.py b/tests/integration/test_history.py index 62cd8d6ff..b6510eca9 100644 --- a/tests/integration/test_history.py +++ b/tests/integration/test_history.py @@ -1,52 +1,37 @@ -import subprocess - import py.path import pytest -from .fixtures import EXECUTABLE, setup_config, setup_data, setup_upload, USERNAME - -UPLOAD_FLAGS = f"--dry_run --user_name={USERNAME}" +from .fixtures import ( + run_process_and_upload_for_descs, + setup_config, + setup_data, + setup_upload, +) @pytest.mark.usefixtures("setup_config") -def test_upload_images( - setup_data: py.path.local, - setup_upload: py.path.local, -): +def test_upload_everything(setup_data: py.path.local, setup_upload: py.path.local): assert len(setup_upload.listdir()) == 0 - x = subprocess.run( - f"{EXECUTABLE} process_and_upload --file_types=image {UPLOAD_FLAGS} {str(setup_data)}", - shell=True, - ) - assert x.returncode == 0, x.stderr + run_process_and_upload_for_descs([str(setup_data)]) + assert 0 < len(setup_upload.listdir()), "should be uploaded for the first time" for upload in setup_upload.listdir(): upload.remove() - x = subprocess.run( - f"{EXECUTABLE} process_and_upload --file_types=image {UPLOAD_FLAGS} {str(setup_data)}", - shell=True, - ) - assert x.returncode == 0, x.stderr + run_process_and_upload_for_descs([str(setup_data)]) + assert len(setup_upload.listdir()) == 0, ( "should NOT upload because it is uploaded already" ) @pytest.mark.usefixtures("setup_config") -def test_upload_gopro( - setup_data: py.path.local, - setup_upload: py.path.local, -): +def test_upload_gopro(setup_data: py.path.local, setup_upload: py.path.local): assert len(setup_upload.listdir()) == 0 video_dir = setup_data.join("gopro_data") - x = subprocess.run( - f"{EXECUTABLE} process_and_upload --skip_process_errors {UPLOAD_FLAGS} {str(video_dir)}", - shell=True, - ) - assert x.returncode == 0, x.stderr + run_process_and_upload_for_descs([str(video_dir)]) assert len(setup_upload.listdir()) == 2, ( f"should be uploaded for the first time but got {setup_upload.listdir()}" ) @@ -55,11 +40,7 @@ def test_upload_gopro( upload.remove() assert len(setup_upload.listdir()) == 1 - x = subprocess.run( - f"{EXECUTABLE} process_and_upload --skip_process_errors {UPLOAD_FLAGS} {str(video_dir)}", - shell=True, - ) - assert x.returncode == 0, x.stderr + run_process_and_upload_for_descs([str(video_dir)]) assert len(setup_upload.listdir()) == 1, ( "should NOT upload because it is uploaded already" ) diff --git a/tests/integration/test_process.py b/tests/integration/test_process.py index 1752bdd57..273551183 100644 --- a/tests/integration/test_process.py +++ b/tests/integration/test_process.py @@ -1,5 +1,4 @@ import datetime -import functools import json import subprocess from pathlib import Path @@ -10,8 +9,8 @@ assert_contains_image_descs, EXECUTABLE, run_command, - run_command_for_descs, run_exiftool_and_generate_geotag_args, + run_process_for_descs, setup_data, validate_and_extract_zip, ) @@ -71,9 +70,6 @@ } -run_process_for_descs = functools.partial(run_command_for_descs, command="process") - - def _local_to_utc(ct: str): return ( datetime.datetime.fromisoformat(ct) @@ -624,3 +620,19 @@ def test_process_video_geotag_source_gopro_gpx_specified(setup_data: py.path.loc assert descs[0]["MAPDeviceMake"] == "GoPro" assert descs[0]["MAPDeviceModel"] == "GoPro Max" assert len(descs[0]["MAPGPSTrack"]) > 0 + + +def test_process_video_geotag_source_exiftool_runtime(setup_data: py.path.local): + video_path = setup_data.join("gopro_data").join("max-360mode.mp4") + + descs = run_process_for_descs( + [ + *["--video_geotag_source", json.dumps({"source": "exiftool"})], + str(video_path), + ] + ) + + assert len(descs) == 1 + assert descs[0]["MAPDeviceMake"] == "GoPro" + assert descs[0]["MAPDeviceModel"] == "GoPro Max" + assert len(descs[0]["MAPGPSTrack"]) > 0 diff --git a/tests/integration/test_process_and_upload.py b/tests/integration/test_process_and_upload.py index c3d93c5ca..0f0c1a599 100644 --- a/tests/integration/test_process_and_upload.py +++ b/tests/integration/test_process_and_upload.py @@ -1,5 +1,4 @@ import datetime -import subprocess from pathlib import Path import py.path @@ -8,19 +7,14 @@ from .fixtures import ( assert_contains_image_descs, assert_same_image_descs, - EXECUTABLE, extract_all_uploaded_descs, IS_FFMPEG_INSTALLED, + run_process_and_upload_for_descs, setup_config, setup_data, setup_upload, - USERNAME, ) -PROCESS_FLAGS = "" -UPLOAD_FLAGS = f"--dry_run --user_name={USERNAME}" - - EXPECTED_DESCS = { "image": { "DSC00001.JPG": { @@ -140,37 +134,37 @@ def test_process_and_upload(setup_data: py.path.local, setup_upload: py.path.loc setup_data.join("images"), setup_data.join("images").join("DSC00001.JPG"), ] - subprocess.run( - f"{EXECUTABLE} --verbose process_and_upload {UPLOAD_FLAGS} {' '.join(map(str, input_paths))} --skip_process_errors", - shell=True, - check=True, - ) - descs = sum(extract_all_uploaded_descs(Path(setup_upload)), []) + descs = run_process_and_upload_for_descs([*[str(path) for path in input_paths]]) assert_contains_image_descs( descs, [*EXPECTED_DESCS["gopro"].values(), *EXPECTED_DESCS["image"].values()], ) + uploaded_descs = sum(extract_all_uploaded_descs(Path(setup_upload)), []) + assert_contains_image_descs( + uploaded_descs, + [*EXPECTED_DESCS["gopro"].values(), *EXPECTED_DESCS["image"].values()], + ) + @pytest.mark.usefixtures("setup_config") def test_process_and_upload_images_only( - setup_data: py.path.local, - setup_upload: py.path.local, + setup_data: py.path.local, setup_upload: py.path.local ): - subprocess.run( - f"""{EXECUTABLE} --verbose process_and_upload \ - {UPLOAD_FLAGS} {PROCESS_FLAGS} \ - --filetypes=image \ - --desc_path=- \ - {setup_data}/images {setup_data}/images {setup_data}/images/DSC00001.JPG -""", - shell=True, - check=True, + descs = run_process_and_upload_for_descs( + [ + "--filetypes=image", + str(setup_data.join("images")), + str(setup_data.join("images")), + str(setup_data.join("images").join("DSC00001.JPG")), + ] ) - descs = sum(extract_all_uploaded_descs(Path(setup_upload)), []) assert_contains_image_descs(descs, [*EXPECTED_DESCS["image"].values()]) + uploaded_descs = sum(extract_all_uploaded_descs(Path(setup_upload)), []) + assert_contains_image_descs(uploaded_descs, [*EXPECTED_DESCS["image"].values()]) + @pytest.mark.usefixtures("setup_config") def test_video_process_and_upload( @@ -183,20 +177,20 @@ def test_video_process_and_upload( gpx_start_time = "2025_03_14_07_00_00_000" gpx_end_time = "2025_03_14_07_01_33_624" gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") - subprocess.run( - f"""{EXECUTABLE} video_process_and_upload \ - {PROCESS_FLAGS} {UPLOAD_FLAGS} \ - --video_sample_interval=2 \ - --video_sample_distance=-1 \ - --video_start_time {gpx_start_time} \ - --geotag_source gpx \ - --geotag_source_path {gpx_file} \ - --desc_path - \ - {video_dir} {video_dir.join("my_samples")} -""", - shell=True, - check=True, + + run_process_and_upload_for_descs( + [ + "--video_sample_interval=2", + "--video_sample_distance=-1", + *["--video_start_time", gpx_start_time], + *["--geotag_source", "gpx"], + *["--geotag_source_path", str(gpx_file)], + str(video_dir), + str(video_dir.join("my_samples")), + ], + command="video_process_and_upload", ) + expected = { "sample-5s_v_000001.jpg": { "filename": "sample-5s_v_000001.jpg", @@ -241,8 +235,8 @@ def test_video_process_and_upload( "filetype": "image", }, } - descs = sum(extract_all_uploaded_descs(Path(setup_upload)), []) - assert_same_image_descs(descs, list(expected.values())) + uploaded_descs = sum(extract_all_uploaded_descs(Path(setup_upload)), []) + assert_same_image_descs(uploaded_descs, list(expected.values())) @pytest.mark.usefixtures("setup_config") @@ -257,20 +251,19 @@ def test_video_process_and_upload_after_gpx( gpx_end_time = "2025_03_14_07_01_33_624" video_start_time = "2025_03_14_07_01_34_624" gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") - subprocess.run( - f"""{EXECUTABLE} video_process_and_upload \ - {PROCESS_FLAGS} {UPLOAD_FLAGS} \ - --video_sample_interval=2 \ - --video_sample_distance=-1 \ - --video_start_time {video_start_time} \ - --geotag_source gpx \ - --geotag_source_path {gpx_file} \ - --skip_process_errors \ - --desc_path - \ - {video_dir} {video_dir.join("my_samples")} -""", - shell=True, - check=True, + + run_process_and_upload_for_descs( + [ + "--video_sample_interval=2", + "--video_sample_distance=-1", + *["--video_start_time", video_start_time], + *["--geotag_source", "gpx"], + *["--geotag_source_path", str(gpx_file)], + str(video_dir), + str(video_dir.join("my_samples")), + ], + command="video_process_and_upload", ) - descs = sum(extract_all_uploaded_descs(Path(setup_upload)), []) - assert_same_image_descs(descs, []) + + uploaded_descs = sum(extract_all_uploaded_descs(Path(setup_upload)), []) + assert_same_image_descs(uploaded_descs, []) diff --git a/tests/integration/test_upload.py b/tests/integration/test_upload.py index 31a33d4c1..578fd9579 100644 --- a/tests/integration/test_upload.py +++ b/tests/integration/test_upload.py @@ -7,86 +7,70 @@ from .fixtures import ( assert_contains_image_descs, - EXECUTABLE, extract_all_uploaded_descs, + run_process_and_upload_for_descs, + run_process_for_descs, + run_upload, setup_config, setup_data, setup_upload, - USERNAME, ) -PROCESS_FLAGS = "" -UPLOAD_FLAGS = f"--dry_run --user_name={USERNAME}" +@pytest.mark.usefixtures("setup_config") +def test_upload_everything(setup_data: py.path.local, setup_upload: py.path.local): + descs = run_process_and_upload_for_descs([str(setup_data)]) + + uploaded_descs: list[dict] = sum(extract_all_uploaded_descs(Path(setup_upload)), []) + assert len(uploaded_descs) > 0, "No data were uploaded" + assert len([d for d in uploaded_descs if d["filetype"] == "camm"]) > 0 + assert len([d for d in uploaded_descs if d["filetype"] == "error"]) == 0 + assert len([d for d in uploaded_descs if d["filetype"] == "image"]) > 0 + + # TODO: check video descs + assert_contains_image_descs(descs, uploaded_descs) @pytest.mark.usefixtures("setup_config") def test_upload_image_dir(setup_data: py.path.local, setup_upload: py.path.local): - subprocess.run( - f"{EXECUTABLE} process {PROCESS_FLAGS} --file_types=image {setup_data}", - shell=True, - check=True, - ) - - subprocess.run( - f"{EXECUTABLE} process_and_upload {UPLOAD_FLAGS} --file_types=image {setup_data}", - shell=True, - check=True, - ) + descs = run_process_and_upload_for_descs(["--file_types=image", str(setup_data)]) uploaded_descs: list[dict] = sum(extract_all_uploaded_descs(Path(setup_upload)), []) assert len(uploaded_descs) > 0, "No images were uploaded" - assert_contains_image_descs( - Path(setup_data.join("mapillary_image_description.json")), - uploaded_descs, - ) + assert_contains_image_descs(descs, uploaded_descs) @pytest.mark.usefixtures("setup_config") def test_upload_image_dir_twice(setup_data: py.path.local, setup_upload: py.path.local): - subprocess.run( - f"{EXECUTABLE} process --skip_process_errors {PROCESS_FLAGS} {setup_data}", - shell=True, - check=True, - ) - desc_path = setup_data.join("mapillary_image_description.json") + descs = run_process_for_descs([str(setup_data)]) # first upload - subprocess.run( - f"{EXECUTABLE} process_and_upload {UPLOAD_FLAGS} --file_types=image {setup_data}", - shell=True, - check=True, - ) - first_descs = extract_all_uploaded_descs(Path(setup_upload)) - assert_contains_image_descs( - Path(desc_path), - sum(first_descs, []), + first_descs = run_process_and_upload_for_descs( + ["--file_types=image", str(setup_data)] ) + assert len([d for d in first_descs if d["filetype"] == "image"]) > 0 + first_uploaded_descs = extract_all_uploaded_descs(Path(setup_upload)) + assert_contains_image_descs(descs, sum(first_uploaded_descs, [])) # expect the second upload to not produce new uploads - subprocess.run( - f"{EXECUTABLE} process_and_upload {UPLOAD_FLAGS} --desc_path={desc_path} --file_types=image {setup_data} {setup_data} {setup_data}/images/DSC00001.JPG", - shell=True, - check=True, - ) - second_descs = extract_all_uploaded_descs(Path(setup_upload)) - assert_contains_image_descs( - Path(desc_path), - sum(second_descs, []), + second_descs = run_process_and_upload_for_descs( + [ + "--file_types=image", + str(setup_data), + str(setup_data), + str(setup_data.join("images/DSC00001.JPG")), + ] ) + assert len([d for d in second_descs if d["filetype"] == "image"]) > 0 + second_uploaded_descs = extract_all_uploaded_descs(Path(setup_upload)) + assert_contains_image_descs(descs, sum(second_uploaded_descs, [])) @pytest.mark.usefixtures("setup_config") def test_upload_wrong_descs(setup_data: py.path.local, setup_upload: py.path.local): - x = subprocess.run( - f"{EXECUTABLE} process --skip_process_errors {PROCESS_FLAGS} {setup_data}", - shell=True, - ) - assert x.returncode == 0, x.stderr - desc_path = setup_data.join("mapillary_image_description.json") - with open(desc_path, "r") as fp: - descs = json.load(fp) + descs = run_process_for_descs([str(setup_data)]) + descs.append( { "filename": str(setup_data.join("not_found")), @@ -97,51 +81,20 @@ def test_upload_wrong_descs(setup_data: py.path.local, setup_upload: py.path.loc "MAPCompassHeading": {"TrueHeading": 17.0, "MagneticHeading": 17.0}, }, ) - with open(desc_path, "w") as fp: - fp.write(json.dumps(descs)) - - x = subprocess.run( - f"{EXECUTABLE} upload {UPLOAD_FLAGS} --desc_path={desc_path} {setup_data} {setup_data} {setup_data}/images/DSC00001.JPG", - shell=True, - ) - assert x.returncode == 15, x.stderr - - -@pytest.mark.usefixtures("setup_config") -def test_upload_read_descs_from_stdin( - setup_data: py.path.local, setup_upload: py.path.local -): - subprocess.run( - f"{EXECUTABLE} process {PROCESS_FLAGS} --file_types=image {setup_data}", - shell=True, - check=True, - ) - - descs = [ - { - "filename": "foo.jpg", - "filetype": "image", - "MAPLatitude": 1.0, - "MAPLongitude": 2.0, - "MAPCaptureTime": "2020_01_02_11_12_13_123456", - }, - ] - descs_json = json.dumps(descs) - - process = subprocess.Popen( - f"{EXECUTABLE} process_and_upload {UPLOAD_FLAGS} --file_types=image {setup_data}", - stdin=subprocess.PIPE, - text=True, - shell=True, - ) - stdout, stderr = process.communicate(input=descs_json) - assert process.returncode == 0, stderr + desc_path = setup_data.join("mapillary_image_description.json") - uploaded_descs: list[dict] = sum(extract_all_uploaded_descs(Path(setup_upload)), []) - assert len(uploaded_descs) > 0, "No images were uploaded" + with open(desc_path, "w") as fp: + fp.write(json.dumps(descs)) - assert_contains_image_descs( - Path(setup_data.join("mapillary_image_description.json")), - uploaded_descs, - ) + with pytest.raises(subprocess.CalledProcessError) as ex: + run_upload( + [ + *["--desc_path", str(desc_path)], + str(setup_data), + str(setup_data), + str(setup_data.join("images/DSC00001.JPG")), + ] + ) + + assert ex.value.returncode == 15, ex.value.stderr \ No newline at end of file diff --git a/tests/integration/test_video_process.py b/tests/integration/test_video_process.py index b245254e7..b9f0928c5 100644 --- a/tests/integration/test_video_process.py +++ b/tests/integration/test_video_process.py @@ -12,13 +12,13 @@ assert_contains_image_descs, IS_FFMPEG_INSTALLED, run_command, - run_command_for_descs, + run_process_for_descs, setup_data, ) run_video_process_for_descs = functools.partial( - run_command_for_descs, command="video_process" + run_process_for_descs, command="video_process" ) run_sample_video = functools.partial(run_command, command="sample_video") @@ -60,7 +60,7 @@ def test_sample_video_without_video_time(setup_data: py.path.local): input_path, ] ) - assert 7 == ex.value.returncode, ex.value.stderr + assert 7 == ex.value.returncode, ex.value.stderr if root_sample_dir.exists(): assert len(root_sample_dir.listdir()) == 0 From 77c29a263da1990ba7ef8c229ffb1044aebe9b44 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sun, 29 Jun 2025 10:38:35 -0700 Subject: [PATCH 16/26] fix upload --- .../geotag/geotag_videos_from_gpx.py | 2 +- .../geotag/video_extractors/exiftool.py | 2 +- .../geotag/video_extractors/gpx.py | 4 +- mapillary_tools/uploader.py | 98 +++++++++---------- 4 files changed, 52 insertions(+), 54 deletions(-) diff --git a/mapillary_tools/geotag/geotag_videos_from_gpx.py b/mapillary_tools/geotag/geotag_videos_from_gpx.py index 89a56fd22..7a728b617 100644 --- a/mapillary_tools/geotag/geotag_videos_from_gpx.py +++ b/mapillary_tools/geotag/geotag_videos_from_gpx.py @@ -10,7 +10,7 @@ else: from typing_extensions import override -from .. import types, exceptions +from .. import exceptions, types from . import options from .base import GeotagVideosFromGeneric from .video_extractors.gpx import GPXVideoExtractor diff --git a/mapillary_tools/geotag/video_extractors/exiftool.py b/mapillary_tools/geotag/video_extractors/exiftool.py index bb51863a5..e2e0f6956 100644 --- a/mapillary_tools/geotag/video_extractors/exiftool.py +++ b/mapillary_tools/geotag/video_extractors/exiftool.py @@ -21,7 +21,7 @@ def __init__(self, video_path: Path, element: ET.Element): self.element = element @override - def extract(self) -> types.VideoMetadataOrError: + def extract(self) -> types.VideoMetadata: exif = exiftool_read_video.ExifToolReadVideo(ET.ElementTree(self.element)) make = exif.extract_make() diff --git a/mapillary_tools/geotag/video_extractors/gpx.py b/mapillary_tools/geotag/video_extractors/gpx.py index 163f980ab..6d08f5b5b 100644 --- a/mapillary_tools/geotag/video_extractors/gpx.py +++ b/mapillary_tools/geotag/video_extractors/gpx.py @@ -1,9 +1,9 @@ from __future__ import annotations import dataclasses +import enum import logging import sys -import enum import typing as T from pathlib import Path @@ -12,7 +12,7 @@ else: from typing_extensions import override -from ... import geo, telemetry, types, exceptions +from ... import exceptions, geo, telemetry, types from ..utils import parse_gpx from .base import BaseVideoExtractor from .native import NativeVideoExtractor diff --git a/mapillary_tools/uploader.py b/mapillary_tools/uploader.py index 55b468c85..388fcd3fa 100644 --- a/mapillary_tools/uploader.py +++ b/mapillary_tools/uploader.py @@ -378,6 +378,9 @@ def _upload_sequence( if progress is None: progress = {} + # FIXME: This is a hack to disable the event emitter inside the uploader + uploader_without_emitter = uploader.copy_uploader_without_emitter() + lock = threading.Lock() def _upload_image(image_metadata: types.ImageMetadata) -> str: @@ -387,7 +390,7 @@ def _upload_image(image_metadata: types.ImageMetadata) -> str: } bytes = cls._dump_image_bytes(image_metadata) - file_handle = uploader.upload_stream( + file_handle = uploader_without_emitter.upload_stream( io.BytesIO(bytes), progress=mutable_progress ) @@ -404,9 +407,6 @@ def _upload_image(image_metadata: types.ImageMetadata) -> str: # TODO: assert sequence is sorted - # FIXME: This is a hack to disable the event emitter inside the uploader - uploader.emittion_disabled = True - uploader.emitter.emit("upload_start", progress) with concurrent.futures.ThreadPoolExecutor( @@ -423,15 +423,12 @@ def _upload_image(image_metadata: types.ImageMetadata) -> str: with io.BytesIO() as manifest_fp: manifest_fp.write(json.dumps(manifest).encode("utf-8")) manifest_fp.seek(0, io.SEEK_SET) - manifest_file_handle = uploader.upload_stream( + manifest_file_handle = uploader_without_emitter.upload_stream( manifest_fp, session_key=f"{uuid.uuid4().hex}.json" ) uploader.emitter.emit("upload_end", progress) - # FIXME: This is a hack to disable the event emitter inside the uploader - uploader.emittion_disabled = False - cluster_id = uploader.finish_upload( manifest_file_handle, api_v4.ClusterFileType.MLY_BUNDLE_MANIFEST, @@ -485,7 +482,6 @@ def __init__( dry_run=False, ): self.user_items = user_items - self.emittion_disabled = False if emitter is None: # An empty event emitter that does nothing self.emitter = EventEmitter() @@ -522,7 +518,7 @@ def upload_stream( progress["retries"] = 0 progress["begin_offset"] = None - self._maybe_emit("upload_start", progress) + self.emitter.emit("upload_start", progress) while True: try: @@ -536,10 +532,48 @@ def upload_stream( progress["retries"] += 1 - self._maybe_emit("upload_end", progress) + self.emitter.emit("upload_end", progress) return file_handle + def finish_upload( + self, + file_handle: str, + cluster_filetype: api_v4.ClusterFileType, + progress: dict[str, T.Any] | None = None, + ) -> str: + """Finish upload with safe retries guraranteed""" + if progress is None: + progress = {} + + if self.dry_run: + cluster_id = "0" + else: + resp = api_v4.finish_upload( + self.user_items["user_upload_token"], + file_handle, + cluster_filetype, + organization_id=self.user_items.get("MAPOrganizationKey"), + ) + + data = resp.json() + cluster_id = data.get("cluster_id") + + # TODO: validate cluster_id + + progress["cluster_id"] = cluster_id + self.emitter.emit("upload_finished", progress) + + return cluster_id + + def copy_uploader_without_emitter(self) -> Uploader: + return Uploader( + self.user_items, + emitter=None, + chunk_size=self.chunk_size, + dry_run=self.dry_run, + ) + def _create_upload_service(self, session_key: str) -> upload_api_v4.UploadService: upload_service: upload_api_v4.UploadService @@ -570,7 +604,7 @@ def _handle_upload_exception( chunk_size = progress["chunk_size"] if retries <= constants.MAX_UPLOAD_RETRIES and _is_retriable_exception(ex): - self._maybe_emit("upload_interrupted", progress) + self.emitter.emit("upload_interrupted", progress) LOG.warning( # use %s instead of %d because offset could be None "Error uploading chunk_size %d at begin_offset %s: %s: %s", @@ -611,7 +645,7 @@ def _chunk_with_progress_emitted( # Whenever a chunk is uploaded, reset retries progress["retries"] = 0 - self._maybe_emit("upload_progress", progress) + self.emitter.emit("upload_progress", progress) def _upload_stream_retryable( self, @@ -626,7 +660,7 @@ def _upload_stream_retryable( progress["begin_offset"] = begin_offset progress["offset"] = begin_offset - self._maybe_emit("upload_fetch_offset", progress) + self.emitter.emit("upload_fetch_offset", progress) fp.seek(begin_offset, io.SEEK_SET) @@ -634,42 +668,6 @@ def _upload_stream_retryable( return upload_service.upload_shifted_chunks(shifted_chunks, begin_offset) - def finish_upload( - self, - file_handle: str, - cluster_filetype: api_v4.ClusterFileType, - progress: dict[str, T.Any] | None = None, - ) -> str: - """Finish upload with safe retries guraranteed""" - if progress is None: - progress = {} - - if self.dry_run: - cluster_id = "0" - else: - resp = api_v4.finish_upload( - self.user_items["user_upload_token"], - file_handle, - cluster_filetype, - organization_id=self.user_items.get("MAPOrganizationKey"), - ) - - data = resp.json() - cluster_id = data.get("cluster_id") - - # TODO: validate cluster_id - - progress["cluster_id"] = cluster_id - self._maybe_emit("upload_finished", progress) - - return cluster_id - - def _maybe_emit( - self, event: EventName, progress: dict[str, T.Any] | UploaderProgress - ): - if not self.emittion_disabled: - return self.emitter.emit(event, progress) - def _validate_metadatas(metadatas: T.Sequence[types.ImageMetadata]): for metadata in metadatas: From 703b181db75d48bff460adb462e02e3760979631 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sun, 29 Jun 2025 16:50:39 -0700 Subject: [PATCH 17/26] tests --- mapillary_tools/geotag/factory.py | 15 +- .../geotag/geotag_images_from_exiftool.py | 40 ++-- .../geotag/geotag_videos_from_exiftool.py | 47 +++-- .../geotag/geotag_videos_from_gpx.py | 10 +- tests/integration/fixtures.py | 61 ++++-- tests/integration/test_gopro.py | 6 +- tests/integration/test_process.py | 175 +++++++++++++++++- tests/integration/test_process_and_upload.py | 8 +- tests/integration/test_upload.py | 2 +- tests/integration/test_video_process.py | 23 +-- tests/unit/test_ffmpeg.py | 34 ++-- 11 files changed, 323 insertions(+), 98 deletions(-) diff --git a/mapillary_tools/geotag/factory.py b/mapillary_tools/geotag/factory.py index 297efb25d..a5652c9e8 100644 --- a/mapillary_tools/geotag/factory.py +++ b/mapillary_tools/geotag/factory.py @@ -113,6 +113,7 @@ def _is_reprocessable(metadata: types.MetadataOrError) -> bool: ( exceptions.MapillaryGeoTaggingError, exceptions.MapillaryVideoGPSNotFoundError, + exceptions.MapillaryExiftoolNotFoundError, ), ): return True @@ -175,8 +176,12 @@ def _build_image_geotag(option: SourceOption) -> base.GeotagImagesFromGeneric | elif option.source is SourceType.EXIFTOOL_XML: # This is to ensure 'video_process --geotag={"source": "exiftool_xml", "source_path": "/tmp/xml_path"}' # to work + if option.source_path is None: + raise exceptions.MapillaryBadParameterError( + "source_path must be provided for EXIFTOOL_XML source" + ) return geotag_images_from_exiftool.GeotagImagesFromExifToolWithSamples( - xml_path=_ensure_source_path(option), + source_path=option.source_path, num_processes=option.num_processes, ) @@ -219,13 +224,17 @@ def _build_video_geotag(option: SourceOption) -> base.GeotagVideosFromGeneric | ) elif option.source is SourceType.EXIFTOOL_XML: + if option.source_path is None: + raise exceptions.MapillaryBadParameterError( + "source_path must be provided for EXIFTOOL_XML source" + ) return geotag_videos_from_exiftool.GeotagVideosFromExifToolXML( - xml_path=_ensure_source_path(option), + source_path=option.source_path, ) elif option.source is SourceType.GPX: return geotag_videos_from_gpx.GeotagVideosFromGPX( - option=option.source_path, num_processes=option.num_processes + source_path=option.source_path, num_processes=option.num_processes ) elif option.source is SourceType.NMEA: diff --git a/mapillary_tools/geotag/geotag_images_from_exiftool.py b/mapillary_tools/geotag/geotag_images_from_exiftool.py index 8d56a1915..6c74e0a99 100644 --- a/mapillary_tools/geotag/geotag_images_from_exiftool.py +++ b/mapillary_tools/geotag/geotag_images_from_exiftool.py @@ -13,29 +13,25 @@ from .. import constants, exceptions, exiftool_read, types, utils from ..exiftool_runner import ExiftoolRunner +from . import options from .base import GeotagImagesFromGeneric from .geotag_images_from_video import GeotagImagesFromVideo from .geotag_videos_from_exiftool import GeotagVideosFromExifToolXML from .image_extractors.exiftool import ImageExifToolExtractor -from .utils import index_rdf_description_by_path LOG = logging.getLogger(__name__) class GeotagImagesFromExifToolXML(GeotagImagesFromGeneric): def __init__( - self, - xml_path: Path, - num_processes: int | None = None, + self, source_path: options.SourcePathOption, num_processes: int | None = None ): - self.xml_path = xml_path + self.source_path = source_path super().__init__(num_processes=num_processes) @classmethod def build_image_extractors( - cls, - rdf_by_path: dict[str, ET.Element], - image_paths: T.Iterable[Path], + cls, rdf_by_path: dict[str, ET.Element], image_paths: T.Iterable[Path] ) -> list[ImageExifToolExtractor | types.ErrorMetadata]: results: list[ImageExifToolExtractor | types.ErrorMetadata] = [] @@ -59,7 +55,9 @@ def build_image_extractors( def _generate_image_extractors( self, image_paths: T.Sequence[Path] ) -> T.Sequence[ImageExifToolExtractor | types.ErrorMetadata]: - rdf_by_path = index_rdf_description_by_path([self.xml_path]) + rdf_by_path = GeotagVideosFromExifToolXML.find_rdf_by_path( + self.source_path, image_paths + ) return self.build_image_extractors(rdf_by_path, image_paths) @@ -81,7 +79,13 @@ def _generate_image_extractors( try: xml = runner.extract_xml(image_paths) except FileNotFoundError as ex: - raise exceptions.MapillaryExiftoolNotFoundError(ex) from ex + exiftool_ex = exceptions.MapillaryExiftoolNotFoundError(ex) + return [ + types.describe_error_metadata( + exiftool_ex, image_path, filetype=types.FileType.IMAGE + ) + for image_path in image_paths + ] try: xml_element = ET.fromstring(xml) @@ -105,29 +109,30 @@ def _generate_image_extractors( class GeotagImagesFromExifToolWithSamples(GeotagImagesFromGeneric): def __init__( self, - xml_path: Path, + source_path: options.SourcePathOption, offset_time: float = 0.0, num_processes: int | None = None, ): super().__init__(num_processes=num_processes) - self.xml_path = xml_path + self.source_path = source_path self.offset_time = offset_time def geotag_samples( self, image_paths: T.Sequence[Path] ) -> list[types.ImageMetadataOrError]: # Find all video paths in self.xml_path - rdf_by_path = index_rdf_description_by_path([self.xml_path]) + rdf_by_path = GeotagVideosFromExifToolXML.find_rdf_by_path( + self.source_path, image_paths + ) video_paths = utils.find_videos( - [Path(pathstr) for pathstr in rdf_by_path.keys()], + [Path(canonical_path) for canonical_path in rdf_by_path.keys()], skip_subfolders=True, ) # Find all video paths that have sample images samples_by_video = utils.find_all_image_samples(image_paths, video_paths) video_metadata_or_errors = GeotagVideosFromExifToolXML( - self.xml_path, - num_processes=self.num_processes, + self.source_path, num_processes=self.num_processes ).to_description(list(samples_by_video.keys())) sample_paths = sum(samples_by_video.values(), []) sample_metadata_or_errors = GeotagImagesFromVideo( @@ -149,8 +154,7 @@ def to_description( non_sample_paths = [path for path in image_paths if path not in sample_paths] non_sample_metadata_or_errors = GeotagImagesFromExifToolXML( - self.xml_path, - num_processes=self.num_processes, + self.source_path, num_processes=self.num_processes ).to_description(non_sample_paths) return sample_metadata_or_errors + non_sample_metadata_or_errors diff --git a/mapillary_tools/geotag/geotag_videos_from_exiftool.py b/mapillary_tools/geotag/geotag_videos_from_exiftool.py index 7334c059c..04bbef4c5 100644 --- a/mapillary_tools/geotag/geotag_videos_from_exiftool.py +++ b/mapillary_tools/geotag/geotag_videos_from_exiftool.py @@ -13,6 +13,7 @@ from .. import constants, exceptions, exiftool_read, types from ..exiftool_runner import ExiftoolRunner +from . import options from .base import GeotagVideosFromGeneric from .utils import index_rdf_description_by_path from .video_extractors.exiftool import VideoExifToolExtractor @@ -22,18 +23,14 @@ class GeotagVideosFromExifToolXML(GeotagVideosFromGeneric): def __init__( - self, - xml_path: Path, - num_processes: int | None = None, + self, source_path: options.SourcePathOption, num_processes: int | None = None ): super().__init__(num_processes=num_processes) - self.xml_path = xml_path + self.source_path = source_path @classmethod - def build_image_extractors( - cls, - rdf_by_path: dict[str, ET.Element], - video_paths: T.Iterable[Path], + def build_video_extractors_from_etree( + cls, rdf_by_path: dict[str, ET.Element], video_paths: T.Iterable[Path] ) -> list[VideoExifToolExtractor | types.ErrorMetadata]: results: list[VideoExifToolExtractor | types.ErrorMetadata] = [] @@ -57,8 +54,28 @@ def build_image_extractors( def _generate_video_extractors( self, video_paths: T.Sequence[Path] ) -> T.Sequence[VideoExifToolExtractor | types.ErrorMetadata]: - rdf_by_path = index_rdf_description_by_path([self.xml_path]) - return self.build_image_extractors(rdf_by_path, video_paths) + rdf_by_path = self.find_rdf_by_path(self.source_path, video_paths) + return self.build_video_extractors_from_etree(rdf_by_path, video_paths) + + @classmethod + def find_rdf_by_path( + cls, option: options.SourcePathOption, paths: T.Iterable[Path] + ) -> dict[str, ET.Element]: + if option.source_path is not None: + return index_rdf_description_by_path([option.source_path]) + + elif option.pattern is not None: + rdf_by_path = {} + for path in paths: + source_path = option.resolve(path) + r = index_rdf_description_by_path([source_path]) + rdfs = list(r.values()) + if rdfs: + rdf_by_path[exiftool_read.canonical_path(path)] = rdfs[0] + return rdf_by_path + + else: + assert False, "Either source_path or pattern must be provided" class GeotagVideosFromExifToolRunner(GeotagVideosFromGeneric): @@ -79,7 +96,13 @@ def _generate_video_extractors( try: xml = runner.extract_xml(video_paths) except FileNotFoundError as ex: - raise exceptions.MapillaryExiftoolNotFoundError(ex) from ex + exiftool_ex = exceptions.MapillaryExiftoolNotFoundError(ex) + return [ + types.describe_error_metadata( + exiftool_ex, video_path, filetype=types.FileType.VIDEO + ) + for video_path in video_paths + ] try: xml_element = ET.fromstring(xml) @@ -95,6 +118,6 @@ def _generate_video_extractors( xml_element ) - return GeotagVideosFromExifToolXML.build_image_extractors( + return GeotagVideosFromExifToolXML.build_video_extractors_from_etree( rdf_by_path, video_paths ) diff --git a/mapillary_tools/geotag/geotag_videos_from_gpx.py b/mapillary_tools/geotag/geotag_videos_from_gpx.py index 7a728b617..ae9fb1851 100644 --- a/mapillary_tools/geotag/geotag_videos_from_gpx.py +++ b/mapillary_tools/geotag/geotag_videos_from_gpx.py @@ -22,13 +22,13 @@ class GeotagVideosFromGPX(GeotagVideosFromGeneric): def __init__( self, - option: options.SourcePathOption | None = None, + source_path: options.SourcePathOption | None = None, num_processes: int | None = None, ): super().__init__(num_processes=num_processes) - if option is None: - option = options.SourcePathOption(pattern="%g.gpx") - self.option = option + if source_path is None: + source_path = options.SourcePathOption(pattern="%g.gpx") + self.source_path = source_path @override def _generate_video_extractors( @@ -36,7 +36,7 @@ def _generate_video_extractors( ) -> T.Sequence[GPXVideoExtractor | types.ErrorMetadata]: results: list[GPXVideoExtractor | types.ErrorMetadata] = [] for video_path in video_paths: - source_path = self.option.resolve(video_path) + source_path = self.source_path.resolve(video_path) if source_path.is_file(): results.append(GPXVideoExtractor(video_path, source_path)) else: diff --git a/tests/integration/fixtures.py b/tests/integration/fixtures.py index b0ce6cddb..1666ca85c 100644 --- a/tests/integration/fixtures.py +++ b/tests/integration/fixtures.py @@ -118,7 +118,9 @@ def _exiftool_installed(): IS_EXIFTOOL_INSTALLED = _exiftool_installed() -def run_exiftool(setup_data: py.path.local) -> py.path.local: +def run_exiftool_dir(setup_data: py.path.local) -> py.path.local: + pytest_skip_if_not_exiftool_installed() + exiftool_outuput_dir = setup_data.join("exiftool_outuput_dir") # The "-w %c" option in exiftool will produce duplicated XML files therefore clean up the folder first shutil.rmtree(exiftool_outuput_dir, ignore_errors=True) @@ -145,9 +147,21 @@ def run_exiftool(setup_data: py.path.local) -> py.path.local: # The solution is to use %c which suffixes the original filenames with an increasing number # -w C%c.txt # C.txt, C1.txt, C2.txt ... # -w C%.c.txt # C0.txt, C1.txt, C2.txt ... - subprocess.check_call( - f"{EXIFTOOL_EXECUTABLE} -r -ee -n -X -api LargeFileSupport=1 -w! {exiftool_outuput_dir}/%f%c.xml {setup_data}", - shell=True, + # TODO: Maybe replace with exiftool_runner + subprocess.run( + [ + EXIFTOOL_EXECUTABLE, + "-fast", # Fast processing + "-q", # Quiet mode + "-r", # Recursive + "-n", # Disable print conversion + "-X", # XML output + "-ee", + *["-api", "LargeFileSupport=1"], + *["-w!", str(exiftool_outuput_dir.join("%f%c.xml"))], + str(setup_data), + ], + check=True, ) return exiftool_outuput_dir @@ -155,10 +169,9 @@ def run_exiftool(setup_data: py.path.local) -> py.path.local: def run_exiftool_and_generate_geotag_args( test_data_dir: py.path.local, run_args: list[str] ) -> list[str]: - if not IS_EXIFTOOL_INSTALLED: - pytest.skip("exiftool not installed") + pytest_skip_if_not_exiftool_installed() - exiftool_outuput_dir = run_exiftool(test_data_dir) + exiftool_outuput_dir = run_exiftool_dir(test_data_dir) return [ *run_args, "--geotag_source=exiftool_xml", @@ -167,10 +180,6 @@ def run_exiftool_and_generate_geotag_args( ] -with open("schema/image_description_schema.json") as fp: - IMAGE_DESCRIPTION_SCHEMA = json.load(fp) - - def validate_and_extract_image(image_path: Path): with image_path.open("rb") as fp: tags = exifread.process_file(fp) @@ -359,6 +368,22 @@ def assert_same_image_descs(left: Path | list[dict], right: Path | list[dict]): assert_contains_image_descs(right, left) +def assert_descs_exact_equal(left: list[dict], right: list[dict]): + assert len(left) == len(right) + + # TODO: make sure groups are the same too + for d in left: + d.pop("MAPSequenceUUID", None) + + for d in right: + d.pop("MAPSequenceUUID", None) + + left.sort(key=lambda d: d["filename"]) + right.sort(key=lambda d: d["filename"]) + + assert left == right + + def run_command(params: list[str], command: str, **kwargs): subprocess.run([*shlex.split(EXECUTABLE), command, *params], check=True, **kwargs) @@ -392,3 +417,17 @@ def run_upload(params: list[str], **kwargs): return run_command( ["--dry_run", *["--user_name", USERNAME], *params], command="upload", **kwargs ) + + +def pytest_skip_if_not_ffmpeg_installed(): + if not IS_FFMPEG_INSTALLED: + pytest.skip("ffmpeg is not installed, skipping the test") + + +def pytest_skip_if_not_exiftool_installed(): + if not IS_EXIFTOOL_INSTALLED: + pytest.skip("exiftool is not installed, skipping the test") + + +with open("schema/image_description_schema.json") as fp: + IMAGE_DESCRIPTION_SCHEMA = json.load(fp) diff --git a/tests/integration/test_gopro.py b/tests/integration/test_gopro.py index 5b34b6e3e..9ea014a4e 100644 --- a/tests/integration/test_gopro.py +++ b/tests/integration/test_gopro.py @@ -8,7 +8,7 @@ from .fixtures import ( assert_same_image_descs, - IS_FFMPEG_INSTALLED, + pytest_skip_if_not_ffmpeg_installed, run_exiftool_and_generate_geotag_args, run_process_for_descs, setup_config, @@ -136,8 +136,8 @@ def test_process_gopro_hero8( setup_data: py.path.local, use_exiftool: bool = False, ): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() + video_path = setup_data.join("hero8.mp4") if use_exiftool: diff --git a/tests/integration/test_process.py b/tests/integration/test_process.py index 273551183..a6567becc 100644 --- a/tests/integration/test_process.py +++ b/tests/integration/test_process.py @@ -1,5 +1,6 @@ import datetime import json +import os import subprocess from pathlib import Path @@ -7,9 +8,12 @@ from .fixtures import ( assert_contains_image_descs, + assert_descs_exact_equal, EXECUTABLE, + pytest_skip_if_not_exiftool_installed, run_command, run_exiftool_and_generate_geotag_args, + run_exiftool_dir, run_process_for_descs, setup_data, validate_and_extract_zip, @@ -571,7 +575,7 @@ def test_process_unsupported_filetypes(setup_data: py.path.local): assert 0 == len(descs) -def test_process_video_geotag_source_gpx_specified(setup_data: py.path.local): +def test_process_video_geotag_source_with_gpx_specified(setup_data: py.path.local): video_path = setup_data.join("videos").join("sample-5s.mp4") gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") @@ -602,7 +606,9 @@ def test_process_video_geotag_source_gpx_not_found(setup_data: py.path.local): assert descs[0]["error"]["type"] == "MapillaryVideoGPSNotFoundError" -def test_process_video_geotag_source_gopro_gpx_specified(setup_data: py.path.local): +def test_process_video_geotag_source_with_gopro_gpx_specified( + setup_data: py.path.local, +): video_path = setup_data.join("gopro_data").join("max-360mode.mp4") gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") @@ -622,12 +628,28 @@ def test_process_video_geotag_source_gopro_gpx_specified(setup_data: py.path.loc assert len(descs[0]["MAPGPSTrack"]) > 0 -def test_process_video_geotag_source_exiftool_runtime(setup_data: py.path.local): +def test_process_geotag_with_gpx_pattern_not_found(setup_data: py.path.local): video_path = setup_data.join("gopro_data").join("max-360mode.mp4") descs = run_process_for_descs( [ - *["--video_geotag_source", json.dumps({"source": "exiftool"})], + *["--video_geotag_source", "gpx"], + str(video_path), + ] + ) + + assert len(descs) == 1 + assert descs[0]["error"]["type"] == "MapillaryVideoGPSNotFoundError" + + +def test_process_geotag_with_gpx_pattern(setup_data: py.path.local): + video_path = setup_data.join("gopro_data").join("max-360mode.mp4") + gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") + gpx_file.copy(setup_data.join("gopro_data").join("max-360mode.gpx")) + + descs = run_process_for_descs( + [ + *["--video_geotag_source", "gpx"], str(video_path), ] ) @@ -636,3 +658,148 @@ def test_process_video_geotag_source_exiftool_runtime(setup_data: py.path.local) assert descs[0]["MAPDeviceMake"] == "GoPro" assert descs[0]["MAPDeviceModel"] == "GoPro Max" assert len(descs[0]["MAPGPSTrack"]) > 0 + + +def test_process_video_geotag_source_with_exiftool_runtime(setup_data: py.path.local): + pytest_skip_if_not_exiftool_installed() + + video_path = setup_data.join("gopro_data").join("max-360mode.mp4") + + exiftool_descs = run_process_for_descs( + [ + *["--video_geotag_source", json.dumps({"source": "exiftool"})], + str(video_path), + ] + ) + + assert len(exiftool_descs) == 1 + assert exiftool_descs[0]["MAPDeviceMake"] == "GoPro" + assert exiftool_descs[0]["MAPDeviceModel"] == "GoPro Max" + assert len(exiftool_descs[0]["MAPGPSTrack"]) > 0 + + native_descs = run_process_for_descs( + [ + *["--video_geotag_source", json.dumps({"source": "native"})], + str(video_path), + ] + ) + + assert_descs_exact_equal(exiftool_descs, native_descs) + + +def test_process_geotag_everything_with_exiftool_runtime(setup_data: py.path.local): + pytest_skip_if_not_exiftool_installed() + + exiftool_descs = run_process_for_descs( + [*["--geotag_source", "exiftool_runtime"], str(setup_data)] + ) + + native_descs = run_process_for_descs( + [*["--geotag_source", "native"], str(setup_data)] + ) + + assert_descs_exact_equal(exiftool_descs, native_descs) + + +def test_process_geotag_everything_with_exiftool_not_found(setup_data: py.path.local): + pytest_skip_if_not_exiftool_installed() + + env = os.environ.copy() + env.update( + { + "MAPILLARY_TOOLS_EXIFTOOL_PATH": "exiftool_not_found", + } + ) + + exiftool_descs = run_process_for_descs( + [*["--geotag_source", "exiftool_runtime"], str(setup_data)], env=env + ) + + assert len(exiftool_descs) > 0 + for d in exiftool_descs: + assert "error" in d + assert d["error"]["type"] == "MapillaryExiftoolNotFoundError" + + +def test_process_geotag_everything_with_exiftool_not_found_overriden( + setup_data: py.path.local, +): + pytest_skip_if_not_exiftool_installed() + + env = os.environ.copy() + env.update( + { + "MAPILLARY_TOOLS_EXIFTOOL_PATH": "exiftool_not_found", + } + ) + + exiftool_descs = run_process_for_descs( + [ + *["--geotag_source", "exiftool_runtime"], + *["--geotag_source", "native"], + str(setup_data), + ], + env=env, + ) + + native_descs = run_process_for_descs( + [ + *["--geotag_source", "exiftool_runtime"], + *["--geotag_source", "native"], + str(setup_data), + ], + env=env, + ) + + assert_descs_exact_equal(exiftool_descs, native_descs) + + +def test_process_geotag_with_exiftool_xml(setup_data: py.path.local): + pytest_skip_if_not_exiftool_installed() + + exiftool_output_dir = run_exiftool_dir(setup_data) + + exiftool_descs = run_process_for_descs( + [ + *[ + "--geotag_source", + json.dumps( + {"source": "exiftool_xml", "source_path": str(exiftool_output_dir)} + ), + ], + str(setup_data), + ] + ) + + native_descs = run_process_for_descs( + [*["--geotag_source", "native"], str(setup_data)] + ) + + assert_descs_exact_equal(exiftool_descs, native_descs) + + +def test_process_geotag_with_exiftool_xml_pattern(setup_data: py.path.local): + pytest_skip_if_not_exiftool_installed() + + exiftool_output_dir = run_exiftool_dir(setup_data) + + exiftool_descs = run_process_for_descs( + [ + *[ + "--geotag_source", + json.dumps( + { + "source": "exiftool_xml", + "pattern": str(exiftool_output_dir.join("%g.xml")), + } + ), + ], + str(setup_data), + ] + ) + + native_descs = run_process_for_descs( + [*["--geotag_source", "native"], str(setup_data)] + ) + + assert_descs_exact_equal(exiftool_descs, native_descs) diff --git a/tests/integration/test_process_and_upload.py b/tests/integration/test_process_and_upload.py index 0f0c1a599..c21cb2c91 100644 --- a/tests/integration/test_process_and_upload.py +++ b/tests/integration/test_process_and_upload.py @@ -8,7 +8,7 @@ assert_contains_image_descs, assert_same_image_descs, extract_all_uploaded_descs, - IS_FFMPEG_INSTALLED, + pytest_skip_if_not_ffmpeg_installed, run_process_and_upload_for_descs, setup_config, setup_data, @@ -170,8 +170,7 @@ def test_process_and_upload_images_only( def test_video_process_and_upload( setup_upload: py.path.local, setup_data: py.path.local ): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() video_dir = setup_data.join("videos") gpx_start_time = "2025_03_14_07_00_00_000" @@ -243,8 +242,7 @@ def test_video_process_and_upload( def test_video_process_and_upload_after_gpx( setup_upload: py.path.local, setup_data: py.path.local ): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() video_dir = setup_data.join("videos") gpx_start_time = "2025_03_14_07_00_00_000" diff --git a/tests/integration/test_upload.py b/tests/integration/test_upload.py index 578fd9579..8bc0600be 100644 --- a/tests/integration/test_upload.py +++ b/tests/integration/test_upload.py @@ -97,4 +97,4 @@ def test_upload_wrong_descs(setup_data: py.path.local, setup_upload: py.path.loc ] ) - assert ex.value.returncode == 15, ex.value.stderr \ No newline at end of file + assert ex.value.returncode == 15, ex.value.stderr diff --git a/tests/integration/test_video_process.py b/tests/integration/test_video_process.py index b9f0928c5..f6c5437fa 100644 --- a/tests/integration/test_video_process.py +++ b/tests/integration/test_video_process.py @@ -10,7 +10,7 @@ from .fixtures import ( assert_contains_image_descs, - IS_FFMPEG_INSTALLED, + pytest_skip_if_not_ffmpeg_installed, run_command, run_process_for_descs, setup_data, @@ -25,8 +25,7 @@ def test_sample_video_relpath(): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() with tempfile.TemporaryDirectory() as dir: run_sample_video(["--rerun", "tests/data/gopro_data/hero8.mp4", str(dir)]) @@ -44,8 +43,7 @@ def test_sample_video_relpath(): def test_sample_video_without_video_time(setup_data: py.path.local): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() video_dir = setup_data.join("videos") root_sample_dir = video_dir.join("mapillary_sampled_video_frames") @@ -79,8 +77,7 @@ def test_sample_video_without_video_time(setup_data: py.path.local): def test_sample_video_specify_video_start_time(setup_data: py.path.local): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() video_dir = setup_data.join("videos") root_sample_dir = video_dir.join("mapillary_sampled_video_frames") @@ -110,8 +107,7 @@ def test_sample_video_specify_video_start_time(setup_data: py.path.local): def test_video_process(setup_data: py.path.local): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() video_dir = setup_data.join("videos") gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") @@ -135,8 +131,7 @@ def test_video_process(setup_data: py.path.local): def test_video_process_sample_with_multiple_distances(setup_data: py.path.local): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() video_dir = setup_data.join("gopro_data") for distance in [0, 2.4, 100]: @@ -156,8 +151,7 @@ def test_video_process_sample_with_multiple_distances(setup_data: py.path.local) def test_video_process_sample_with_distance(setup_data: py.path.local): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() video_dir = setup_data.join("gopro_data") sample_dir = Path(setup_data, "gopro_data", "my_samples") @@ -247,8 +241,7 @@ def test_video_process_sample_with_distance(setup_data: py.path.local): def test_video_process_multiple_videos(setup_data: py.path.local): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() sub_folder = setup_data.join("video_sub_folder").mkdir() video_path = setup_data.join("videos").join("sample-5s.mp4") diff --git a/tests/unit/test_ffmpeg.py b/tests/unit/test_ffmpeg.py index 3079e7eac..6c0b016e5 100644 --- a/tests/unit/test_ffmpeg.py +++ b/tests/unit/test_ffmpeg.py @@ -8,12 +8,12 @@ from mapillary_tools import ffmpeg -from ..integration.fixtures import IS_FFMPEG_INSTALLED, setup_data +from ..integration.fixtures import pytest_skip_if_not_ffmpeg_installed, setup_data def test_ffmpeg_run_ok(): - if not IS_FFMPEG_INSTALLED: - pytest.skip("ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() + ff = ffmpeg.FFMPEG() ff.run_ffmpeg_non_interactive(["-version"]) @@ -23,15 +23,14 @@ def test_ffmpeg_run_ok(): raises=ffmpeg.FFmpegCalledProcessError, ) def test_ffmpeg_run_raise(): - if not IS_FFMPEG_INSTALLED: - pytest.skip("ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() + ff = ffmpeg.FFMPEG() ff.run_ffmpeg_non_interactive(["foo"]) def test_ffmpeg_extract_frames_ok(setup_data: py.path.local): - if not IS_FFMPEG_INSTALLED: - pytest.skip("ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() ff = ffmpeg.FFMPEG() @@ -59,8 +58,7 @@ def test_ffmpeg_extract_frames_ok(setup_data: py.path.local): def test_ffmpeg_extract_frames_with_specifier_ok(setup_data: py.path.local): - if not IS_FFMPEG_INSTALLED: - pytest.skip("ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() ff = ffmpeg.FFMPEG() @@ -93,8 +91,7 @@ def test_ffmpeg_extract_frames_with_specifier_ok(setup_data: py.path.local): def test_ffmpeg_extract_specified_frames_ok(setup_data: py.path.local): - if not IS_FFMPEG_INSTALLED: - pytest.skip("ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() ff = ffmpeg.FFMPEG() @@ -115,8 +112,7 @@ def test_ffmpeg_extract_specified_frames_ok(setup_data: py.path.local): def test_ffmpeg_extract_specified_frames_empty_ok(setup_data: py.path.local): - if not IS_FFMPEG_INSTALLED: - pytest.skip("ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() ff = ffmpeg.FFMPEG() @@ -132,8 +128,7 @@ def test_ffmpeg_extract_specified_frames_empty_ok(setup_data: py.path.local): def test_probe_format_and_streams_ok(setup_data: py.path.local): - if not IS_FFMPEG_INSTALLED: - pytest.skip("ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() video_path = Path(setup_data.join("videos/sample-5s.mp4")) @@ -150,8 +145,7 @@ def test_probe_format_and_streams_ok(setup_data: py.path.local): def test_probe_format_and_streams_gopro_ok(setup_data: py.path.local): - if not IS_FFMPEG_INSTALLED: - pytest.skip("ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() video_path = Path(setup_data.join("gopro_data/hero8.mp4")) @@ -169,8 +163,7 @@ def test_probe_format_and_streams_gopro_ok(setup_data: py.path.local): def test_ffmpeg_not_exists(): - if not IS_FFMPEG_INSTALLED: - pytest.skip("ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() ff = ffmpeg.FFMPEG() try: @@ -194,8 +187,7 @@ def test_ffmpeg_not_exists(): def test_ffprobe_not_exists(): - if not IS_FFMPEG_INSTALLED: - pytest.skip("ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() ff = ffmpeg.FFMPEG() try: From e2918808b1d0c932e6a6d414f8fc2b7bbea406f8 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sun, 29 Jun 2025 17:16:44 -0700 Subject: [PATCH 18/26] deduplicate points --- mapillary_tools/blackvue_parser.py | 25 ++++++++++-- mapillary_tools/exiftool_read_video.py | 56 ++++++++++++++++++++------ 2 files changed, 64 insertions(+), 17 deletions(-) diff --git a/mapillary_tools/blackvue_parser.py b/mapillary_tools/blackvue_parser.py index 29c6966ab..56a696e3e 100644 --- a/mapillary_tools/blackvue_parser.py +++ b/mapillary_tools/blackvue_parser.py @@ -46,7 +46,7 @@ def extract_blackvue_info(fp: T.BinaryIO) -> BlackVueInfo | None: if gps_data is None: return None - points = list(_parse_gps_box(gps_data)) + points = _parse_gps_box(gps_data) points.sort(key=lambda p: p.time) if points: @@ -114,7 +114,7 @@ def _extract_camera_model_from_cprt(cprt_bytes: bytes) -> str: return "" -def _parse_gps_box(gps_data: bytes) -> T.Generator[geo.Point, None, None]: +def _parse_gps_box(gps_data: bytes) -> list[geo.Point]: """ >>> list(_parse_gps_box(b"[1623057074211]$GPGGA,202530.00,5109.0262,N,11401.8407,W,5,40,0.5,1097.36,M,-17.00,M,18,TSTR*61")) [Point(time=1623057074211, lat=51.150436666666664, lon=-114.03067833333333, alt=1097.36, angle=None)] @@ -131,6 +131,8 @@ def _parse_gps_box(gps_data: bytes) -> T.Generator[geo.Point, None, None]: >>> list(_parse_gps_box(b"[1623057074211]$GPVTG,,T,,M,0.078,N,0.144,K,D*28[1623057075215]")) [] """ + points_by_sentence_type: dict[str, list[geo.Point]] = {} + for line_bytes in gps_data.splitlines(): match = NMEA_LINE_REGEX.match(line_bytes) if match is None: @@ -159,20 +161,35 @@ def _parse_gps_box(gps_data: bytes) -> T.Generator[geo.Point, None, None]: if message.sentence_type in ["GGA"]: if not message.is_valid: continue - yield geo.Point( + point = geo.Point( time=epoch_ms, lat=message.latitude, lon=message.longitude, alt=message.altitude, angle=None, ) + points_by_sentence_type.setdefault(message.sentence_type, []).append(point) + elif message.sentence_type in ["RMC", "GLL"]: if not message.is_valid: continue - yield geo.Point( + point = geo.Point( time=epoch_ms, lat=message.latitude, lon=message.longitude, alt=None, angle=None, ) + points_by_sentence_type.setdefault(message.sentence_type, []).append(point) + + # This is the extraction order in exiftool + if "RMC" in points_by_sentence_type: + return points_by_sentence_type["RMC"] + + if "GGA" in points_by_sentence_type: + return points_by_sentence_type["GGA"] + + if "GLL" in points_by_sentence_type: + return points_by_sentence_type["GLL"] + + return [] diff --git a/mapillary_tools/exiftool_read_video.py b/mapillary_tools/exiftool_read_video.py index 9973097de..8c9338b24 100644 --- a/mapillary_tools/exiftool_read_video.py +++ b/mapillary_tools/exiftool_read_video.py @@ -82,6 +82,32 @@ def _extract_alternative_fields( return None +def _same_gps_point(left: GPSPoint, right: GPSPoint) -> bool: + """ + >>> left = GPSPoint(time=56.0, lat=36.741385, lon=29.021274, alt=141.6, angle=1.54, epoch_time=None, fix=None, precision=None, ground_speed=None) + >>> right = GPSPoint(time=56.0, lat=36.741385, lon=29.021274, alt=142.4, angle=1.54, epoch_time=None, fix=None, precision=None, ground_speed=None) + >>> _same_gps_point(left, right) + True + """ + return ( + left.time == right.time + and left.lon == right.lon + and left.lat == right.lat + and left.epoch_time == right.epoch_time + and left.angle == right.angle + ) + + +def _deduplicate_gps_points( + track: list[GPSPoint], same_gps_point: T.Callable[[GPSPoint, GPSPoint], bool] +) -> list[GPSPoint]: + deduplicated_track: list[GPSPoint] = [] + for point in track: + if not deduplicated_track or not same_gps_point(deduplicated_track[-1], point): + deduplicated_track.append(point) + return deduplicated_track + + def _aggregate_gps_track( texts_by_tag: dict[str, list[str]], time_tag: str | None, @@ -174,7 +200,7 @@ def _aggregate_float_values_same_length( epoch_time = geo.as_unix_time(dt) # build track - track = [] + track: list[GPSPoint] = [] for timestamp, lon, lat, alt, direction, ground_speed in zip( timestamps, lons, @@ -185,22 +211,26 @@ def _aggregate_float_values_same_length( ): if timestamp is None or lon is None or lat is None: continue - track.append( - GPSPoint( - time=timestamp, - lon=lon, - lat=lat, - alt=alt, - angle=direction, - epoch_time=epoch_time, - fix=None, - precision=None, - ground_speed=ground_speed, - ) + + point = GPSPoint( + time=timestamp, + lon=lon, + lat=lat, + alt=alt, + angle=direction, + epoch_time=epoch_time, + fix=None, + precision=None, + ground_speed=ground_speed, ) + if not track or not _same_gps_point(track[-1], point): + track.append(point) + track.sort(key=lambda point: point.time) + track = _deduplicate_gps_points(track, same_gps_point=_same_gps_point) + if time_tag is not None: if track: first_time = track[0].time From ea66952710e68ab9dcc7ec8504dd47f944827d97 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sun, 29 Jun 2025 19:28:37 -0700 Subject: [PATCH 19/26] fix tests --- tests/unit/test_blackvue_parser.py | 35 ++++++++---------------------- 1 file changed, 9 insertions(+), 26 deletions(-) diff --git a/tests/unit/test_blackvue_parser.py b/tests/unit/test_blackvue_parser.py index 6cb1eb014..7d4f145fb 100644 --- a/tests/unit/test_blackvue_parser.py +++ b/tests/unit/test_blackvue_parser.py @@ -39,29 +39,12 @@ def test_parse_points(): box = {"type": b"free", "data": [{"type": b"gps ", "data": gps_data}]} data = cparser.Box32ConstructBuilder({b"free": {}}).Box.build(box) info = blackvue_parser.extract_blackvue_info(io.BytesIO(data)) - assert info is not None - assert [ - geo.Point( - time=0.0, lat=38.8861575, lon=-76.99239516666667, alt=10.2, angle=None - ), - geo.Point( - time=0.002, - lat=38.8861575, - lon=-76.99239516666667, - alt=None, - angle=None, - ), - geo.Point( - time=0.003, - lat=38.88615816666667, - lon=-76.992434, - alt=None, - angle=None, - ), - geo.Point( - time=0.968, lat=38.88615816666667, lon=-76.992434, alt=7.7, angle=None - ), - geo.Point( - time=1.005, lat=38.88615816666667, lon=-76.992434, alt=7.7, angle=None - ), - ] == list(info.gps or []) + assert info == blackvue_parser.BlackVueInfo( + gps=[ + geo.Point( + time=0.0, lat=38.88615816666667, lon=-76.992434, alt=None, angle=None + ) + ], + make="BlackVue", + model="", + ) From 3c3d228b9f6a4f06447551709a0f3686bb4ef27e Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sun, 29 Jun 2025 19:46:20 -0700 Subject: [PATCH 20/26] fix tests --- mapillary_tools/upload.py | 19 +++++++++++++------ tests/integration/test_process.py | 13 ++++++++++++- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/mapillary_tools/upload.py b/mapillary_tools/upload.py index 014e28bd8..356015740 100644 --- a/mapillary_tools/upload.py +++ b/mapillary_tools/upload.py @@ -671,18 +671,25 @@ def _load_valid_metadatas_from_desc_path( if desc_path is None: desc_path = _find_desc_path(import_paths) - with ( - contextlib.nullcontext(sys.stdin.buffer) - if desc_path == "-" - else open(desc_path, "rb") - ) as fp: + if desc_path == "-": try: - metadatas = DescriptionJSONSerializer.deserialize_stream(fp) + metadatas = DescriptionJSONSerializer.deserialize_stream(sys.stdin.buffer) except json.JSONDecodeError as ex: raise exceptions.MapillaryInvalidDescriptionFile( f"Invalid JSON stream from {desc_path}: {ex}" ) from ex + else: + if not os.path.isfile(desc_path): + raise exceptions.MapillaryFileNotFoundError(f"Description file not found: {desc_path}") + with open(desc_path, "rb") as fp: + try: + metadatas = DescriptionJSONSerializer.deserialize_stream(fp) + except json.JSONDecodeError as ex: + raise exceptions.MapillaryInvalidDescriptionFile( + f"Invalid JSON stream from {desc_path}: {ex}" + ) from ex + return metadatas diff --git a/tests/integration/test_process.py b/tests/integration/test_process.py index a6567becc..59335bd21 100644 --- a/tests/integration/test_process.py +++ b/tests/integration/test_process.py @@ -5,6 +5,7 @@ from pathlib import Path import py.path +import pytest from .fixtures import ( assert_contains_image_descs, @@ -294,7 +295,10 @@ def test_parse_adobe_coordinates(setup_data: py.path.local): ) -def test_zip(tmpdir: py.path.local, setup_data: py.path.local): +def test_zip_ok(tmpdir: py.path.local, setup_data: py.path.local): + # Generate description file in the setup_data directory + run_command(["--file_types=image", str(setup_data)], command="process") + zip_dir = tmpdir.mkdir("zip_dir") run_command([str(setup_data), str(zip_dir)], command="zip") assert 0 < len(zip_dir.listdir()) @@ -302,6 +306,13 @@ def test_zip(tmpdir: py.path.local, setup_data: py.path.local): validate_and_extract_zip(Path(file)) +def test_zip_desc_not_found(tmpdir: py.path.local, setup_data: py.path.local): + zip_dir = tmpdir.mkdir("zip_dir") + with pytest.raises(subprocess.CalledProcessError) as exc_info: + run_command([str(setup_data), str(zip_dir)], command="zip") + assert exc_info.value.returncode == 3 + + def test_process_boolean_options(setup_data: py.path.local): boolean_options = [ "--interpolate_directions", From c9617dae3b1ac20d6c47c0e76391ff54ae3def77 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sun, 29 Jun 2025 19:47:27 -0700 Subject: [PATCH 21/26] lint --- mapillary_tools/upload.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mapillary_tools/upload.py b/mapillary_tools/upload.py index 356015740..00f11874b 100644 --- a/mapillary_tools/upload.py +++ b/mapillary_tools/upload.py @@ -1,6 +1,5 @@ from __future__ import annotations -import contextlib import json import logging import os From 87afa883903a4ab1616b3220c722f73a2522cd73 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sun, 29 Jun 2025 19:49:37 -0700 Subject: [PATCH 22/26] format --- mapillary_tools/upload.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mapillary_tools/upload.py b/mapillary_tools/upload.py index 00f11874b..99ba0ae49 100644 --- a/mapillary_tools/upload.py +++ b/mapillary_tools/upload.py @@ -680,7 +680,9 @@ def _load_valid_metadatas_from_desc_path( else: if not os.path.isfile(desc_path): - raise exceptions.MapillaryFileNotFoundError(f"Description file not found: {desc_path}") + raise exceptions.MapillaryFileNotFoundError( + f"Description file not found: {desc_path}" + ) with open(desc_path, "rb") as fp: try: metadatas = DescriptionJSONSerializer.deserialize_stream(fp) From 86e345a78913b6bddb96771239a4acf5f09c0676 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sun, 29 Jun 2025 19:53:44 -0700 Subject: [PATCH 23/26] generate docetest --- mapillary_tools/geotag/factory.py | 47 +++++++++++++++++++++++++++++++ mapillary_tools/geotag/options.py | 18 ++++++++++++ 2 files changed, 65 insertions(+) diff --git a/mapillary_tools/geotag/factory.py b/mapillary_tools/geotag/factory.py index a5652c9e8..a1474d052 100644 --- a/mapillary_tools/geotag/factory.py +++ b/mapillary_tools/geotag/factory.py @@ -158,6 +158,9 @@ def _ensure_source_path(option: SourceOption) -> Path: def _build_image_geotag(option: SourceOption) -> base.GeotagImagesFromGeneric | None: + """ + Build a GeotagImagesFromGeneric object based on the provided option. + """ if option.interpolation is None: interpolation = InterpolationOption() else: @@ -213,6 +216,50 @@ def _build_image_geotag(option: SourceOption) -> base.GeotagImagesFromGeneric | def _build_video_geotag(option: SourceOption) -> base.GeotagVideosFromGeneric | None: + """ + Build a GeotagVideosFromGeneric object based on the provided option. + + Examples: + >>> from pathlib import Path + >>> from mapillary_tools.geotag.options import SourceOption, SourceType + >>> opt = SourceOption(SourceType.NATIVE) + >>> geotagger = _build_video_geotag(opt) + >>> geotagger.__class__.__name__ + 'GeotagVideosFromVideo' + + >>> opt = SourceOption(SourceType.EXIFTOOL_RUNTIME) + >>> geotagger = _build_video_geotag(opt) + >>> geotagger.__class__.__name__ + 'GeotagVideosFromExifToolRunner' + + >>> opt = SourceOption(SourceType.EXIFTOOL_XML, source_path=Path("/tmp/test.xml")) + >>> geotagger = _build_video_geotag(opt) + >>> geotagger.__class__.__name__ + 'GeotagVideosFromExifToolXML' + + >>> opt = SourceOption(SourceType.GPX, source_path=Path("/tmp/test.gpx")) + >>> geotagger = _build_video_geotag(opt) + >>> geotagger.__class__.__name__ + 'GeotagVideosFromGPX' + + >>> opt = SourceOption(SourceType.NMEA, source_path=Path("/tmp/test.nmea")) + >>> _build_video_geotag(opt) is None + True + + >>> opt = SourceOption(SourceType.EXIF) + >>> _build_video_geotag(opt) is None + True + + >>> opt = SourceOption(SourceType.GOPRO) + >>> _build_video_geotag(opt) is None + True + + >>> try: + ... _build_video_geotag(SourceOption("invalid")) + ... except ValueError as e: + ... "Invalid geotag source" in str(e) + True + """ if option.source is SourceType.NATIVE: return geotag_videos_from_video.GeotagVideosFromVideo( num_processes=option.num_processes, filetypes=option.filetypes diff --git a/mapillary_tools/geotag/options.py b/mapillary_tools/geotag/options.py index c88ecadfe..81e1cf4d3 100644 --- a/mapillary_tools/geotag/options.py +++ b/mapillary_tools/geotag/options.py @@ -89,6 +89,24 @@ def __post_init__(self): raise ValueError("Either pattern or source_path must be provided") def resolve(self, path: Path) -> Path: + """ + Resolve the source path or pattern against the given path. + + Examples: + >>> from pathlib import Path + >>> opt = SourcePathOption(source_path=Path("/foo/bar.mp4")) + >>> opt.resolve(Path("/baz/qux.mp4")) + PosixPath('/foo/bar.mp4') + + >>> opt = SourcePathOption(pattern="videos/%g_sub%e") + >>> opt.resolve(Path("/data/video1.mp4")) + PosixPath('/data/videos/video1_sub.mp4') + + >>> opt = SourcePathOption(pattern="/abs/path/%f") + >>> opt.resolve(Path("/tmp/abc.mov")) + PosixPath('/abs/path/abc.mov') + """ + if self.source_path is not None: return self.source_path From feb9e77f5029649f6d17b669c953553c52faff55 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sun, 29 Jun 2025 20:03:18 -0700 Subject: [PATCH 24/26] fix tempfile in windows --- tests/integration/fixtures.py | 36 +++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/tests/integration/fixtures.py b/tests/integration/fixtures.py index 1666ca85c..6c00abe14 100644 --- a/tests/integration/fixtures.py +++ b/tests/integration/fixtures.py @@ -389,20 +389,28 @@ def run_command(params: list[str], command: str, **kwargs): def run_process_for_descs(params: list[str], command: str = "process", **kwargs): - with tempfile.NamedTemporaryFile(suffix=".json") as desc_file: - run_command( - [ - "--skip_process_errors", - *["--desc_path", str(desc_file.name)], - *params, - ], - command, - **kwargs, - ) - - with open(desc_file.name, "r") as fp: - fp.seek(0) - return json.load(fp) + # Make windows happy with delete=False + with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as desc_file: + try: + run_command( + [ + "--skip_process_errors", + *["--desc_path", str(desc_file.name)], + *params, + ], + command, + **kwargs, + ) + + with open(desc_file.name, "r") as fp: + fp.seek(0) + return json.load(fp) + + finally: + try: + os.remove(desc_file.name) + except FileNotFoundError: + pass def run_process_and_upload_for_descs( From bab0d8a12ee293066755dffed516d57366a1498c Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sun, 29 Jun 2025 20:09:34 -0700 Subject: [PATCH 25/26] fix --- tests/integration/fixtures.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tests/integration/fixtures.py b/tests/integration/fixtures.py index 6c00abe14..ac58e3afb 100644 --- a/tests/integration/fixtures.py +++ b/tests/integration/fixtures.py @@ -5,6 +5,7 @@ import shlex import shutil import subprocess +import sys import tempfile import zipfile from pathlib import Path @@ -390,6 +391,11 @@ def run_command(params: list[str], command: str, **kwargs): def run_process_for_descs(params: list[str], command: str = "process", **kwargs): # Make windows happy with delete=False + # https://github.com/mapillary/mapillary_tools/issues/503 + if sys.platform in ["win32"]: + delete = False + else: + delete = True with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as desc_file: try: run_command( @@ -406,11 +412,14 @@ def run_process_for_descs(params: list[str], command: str = "process", **kwargs) fp.seek(0) return json.load(fp) + if not delete: + desc_file.close() finally: - try: - os.remove(desc_file.name) - except FileNotFoundError: - pass + if not delete: + try: + os.remove(desc_file.name) + except FileNotFoundError: + pass def run_process_and_upload_for_descs( From 05d15e0b2889b62f7b9f208656a087b9e9725172 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Sun, 29 Jun 2025 20:13:58 -0700 Subject: [PATCH 26/26] fix --- tests/integration/fixtures.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/integration/fixtures.py b/tests/integration/fixtures.py index ac58e3afb..5c86f9554 100644 --- a/tests/integration/fixtures.py +++ b/tests/integration/fixtures.py @@ -410,10 +410,11 @@ def run_process_for_descs(params: list[str], command: str = "process", **kwargs) with open(desc_file.name, "r") as fp: fp.seek(0) - return json.load(fp) + descs = json.load(fp) if not delete: desc_file.close() + finally: if not delete: try: @@ -421,6 +422,8 @@ def run_process_for_descs(params: list[str], command: str = "process", **kwargs) except FileNotFoundError: pass + return descs + def run_process_and_upload_for_descs( params: list[str], command="process_and_upload", **kwargs