diff --git a/mapillary_tools/blackvue_parser.py b/mapillary_tools/blackvue_parser.py index 29c6966ab..56a696e3e 100644 --- a/mapillary_tools/blackvue_parser.py +++ b/mapillary_tools/blackvue_parser.py @@ -46,7 +46,7 @@ def extract_blackvue_info(fp: T.BinaryIO) -> BlackVueInfo | None: if gps_data is None: return None - points = list(_parse_gps_box(gps_data)) + points = _parse_gps_box(gps_data) points.sort(key=lambda p: p.time) if points: @@ -114,7 +114,7 @@ def _extract_camera_model_from_cprt(cprt_bytes: bytes) -> str: return "" -def _parse_gps_box(gps_data: bytes) -> T.Generator[geo.Point, None, None]: +def _parse_gps_box(gps_data: bytes) -> list[geo.Point]: """ >>> list(_parse_gps_box(b"[1623057074211]$GPGGA,202530.00,5109.0262,N,11401.8407,W,5,40,0.5,1097.36,M,-17.00,M,18,TSTR*61")) [Point(time=1623057074211, lat=51.150436666666664, lon=-114.03067833333333, alt=1097.36, angle=None)] @@ -131,6 +131,8 @@ def _parse_gps_box(gps_data: bytes) -> T.Generator[geo.Point, None, None]: >>> list(_parse_gps_box(b"[1623057074211]$GPVTG,,T,,M,0.078,N,0.144,K,D*28[1623057075215]")) [] """ + points_by_sentence_type: dict[str, list[geo.Point]] = {} + for line_bytes in gps_data.splitlines(): match = NMEA_LINE_REGEX.match(line_bytes) if match is None: @@ -159,20 +161,35 @@ def _parse_gps_box(gps_data: bytes) -> T.Generator[geo.Point, None, None]: if message.sentence_type in ["GGA"]: if not message.is_valid: continue - yield geo.Point( + point = geo.Point( time=epoch_ms, lat=message.latitude, lon=message.longitude, alt=message.altitude, angle=None, ) + points_by_sentence_type.setdefault(message.sentence_type, []).append(point) + elif message.sentence_type in ["RMC", "GLL"]: if not message.is_valid: continue - yield geo.Point( + point = geo.Point( time=epoch_ms, lat=message.latitude, lon=message.longitude, alt=None, angle=None, ) + points_by_sentence_type.setdefault(message.sentence_type, []).append(point) + + # This is the extraction order in exiftool + if "RMC" in points_by_sentence_type: + return points_by_sentence_type["RMC"] + + if "GGA" in points_by_sentence_type: + return points_by_sentence_type["GGA"] + + if "GLL" in points_by_sentence_type: + return points_by_sentence_type["GLL"] + + return [] diff --git a/mapillary_tools/exiftool_read_video.py b/mapillary_tools/exiftool_read_video.py index 9973097de..8c9338b24 100644 --- a/mapillary_tools/exiftool_read_video.py +++ b/mapillary_tools/exiftool_read_video.py @@ -82,6 +82,32 @@ def _extract_alternative_fields( return None +def _same_gps_point(left: GPSPoint, right: GPSPoint) -> bool: + """ + >>> left = GPSPoint(time=56.0, lat=36.741385, lon=29.021274, alt=141.6, angle=1.54, epoch_time=None, fix=None, precision=None, ground_speed=None) + >>> right = GPSPoint(time=56.0, lat=36.741385, lon=29.021274, alt=142.4, angle=1.54, epoch_time=None, fix=None, precision=None, ground_speed=None) + >>> _same_gps_point(left, right) + True + """ + return ( + left.time == right.time + and left.lon == right.lon + and left.lat == right.lat + and left.epoch_time == right.epoch_time + and left.angle == right.angle + ) + + +def _deduplicate_gps_points( + track: list[GPSPoint], same_gps_point: T.Callable[[GPSPoint, GPSPoint], bool] +) -> list[GPSPoint]: + deduplicated_track: list[GPSPoint] = [] + for point in track: + if not deduplicated_track or not same_gps_point(deduplicated_track[-1], point): + deduplicated_track.append(point) + return deduplicated_track + + def _aggregate_gps_track( texts_by_tag: dict[str, list[str]], time_tag: str | None, @@ -174,7 +200,7 @@ def _aggregate_float_values_same_length( epoch_time = geo.as_unix_time(dt) # build track - track = [] + track: list[GPSPoint] = [] for timestamp, lon, lat, alt, direction, ground_speed in zip( timestamps, lons, @@ -185,22 +211,26 @@ def _aggregate_float_values_same_length( ): if timestamp is None or lon is None or lat is None: continue - track.append( - GPSPoint( - time=timestamp, - lon=lon, - lat=lat, - alt=alt, - angle=direction, - epoch_time=epoch_time, - fix=None, - precision=None, - ground_speed=ground_speed, - ) + + point = GPSPoint( + time=timestamp, + lon=lon, + lat=lat, + alt=alt, + angle=direction, + epoch_time=epoch_time, + fix=None, + precision=None, + ground_speed=ground_speed, ) + if not track or not _same_gps_point(track[-1], point): + track.append(point) + track.sort(key=lambda point: point.time) + track = _deduplicate_gps_points(track, same_gps_point=_same_gps_point) + if time_tag is not None: if track: first_time = track[0].time diff --git a/mapillary_tools/exiftool_runner.py b/mapillary_tools/exiftool_runner.py index 7327b6a63..4987409c0 100644 --- a/mapillary_tools/exiftool_runner.py +++ b/mapillary_tools/exiftool_runner.py @@ -1,7 +1,5 @@ from __future__ import annotations -import platform -import shutil import subprocess import typing as T from pathlib import Path @@ -12,32 +10,13 @@ class ExiftoolRunner: Wrapper around ExifTool to run it in a subprocess """ - def __init__(self, exiftool_path: str | None = None, recursive: bool = False): - if exiftool_path is None: - exiftool_path = self._search_preferred_exiftool_path() - self.exiftool_path = exiftool_path + def __init__(self, exiftool_executable: str = "exiftool", recursive: bool = False): + self.exiftool_executable = exiftool_executable self.recursive = recursive - def _search_preferred_exiftool_path(self) -> str: - system = platform.system() - - if system and system.lower() == "windows": - exiftool_paths = ["exiftool.exe", "exiftool"] - else: - exiftool_paths = ["exiftool", "exiftool.exe"] - - for path in exiftool_paths: - full_path = shutil.which(path) - if full_path: - return path - - # Always return the prefered one, even if it is not found, - # and let the subprocess.run figure out the error later - return exiftool_paths[0] - def _build_args_read_stdin(self) -> list[str]: args: list[str] = [ - self.exiftool_path, + self.exiftool_executable, "-fast", "-q", "-n", # Disable print conversion diff --git a/mapillary_tools/geotag/__init__.py b/mapillary_tools/geotag/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/mapillary_tools/geotag/factory.py b/mapillary_tools/geotag/factory.py index def91c7a7..a1474d052 100644 --- a/mapillary_tools/geotag/factory.py +++ b/mapillary_tools/geotag/factory.py @@ -6,7 +6,6 @@ from pathlib import Path from .. import exceptions, types, utils -from ..types import FileType from . import ( base, geotag_images_from_exif, @@ -70,8 +69,25 @@ def process( for idx, option in enumerate(options): LOG.debug("Processing %d files with %s", len(reprocessable_paths), option) - image_metadata_or_errors = _geotag_images(reprocessable_paths, option) - video_metadata_or_errors = _geotag_videos(reprocessable_paths, option) + image_videos, video_paths = _filter_images_and_videos( + reprocessable_paths, option.filetypes + ) + + if image_videos: + image_geotag = _build_image_geotag(option) + image_metadata_or_errors = ( + image_geotag.to_description(image_videos) if image_geotag else [] + ) + else: + image_metadata_or_errors = [] + + if video_paths: + video_geotag = _build_video_geotag(option) + video_metadata_or_errors = ( + video_geotag.to_description(video_paths) if video_geotag else [] + ) + else: + video_metadata_or_errors = [] more_option = idx < len(options) - 1 @@ -97,6 +113,7 @@ def _is_reprocessable(metadata: types.MetadataOrError) -> bool: ( exceptions.MapillaryGeoTaggingError, exceptions.MapillaryVideoGPSNotFoundError, + exceptions.MapillaryExiftoolNotFoundError, ), ): return True @@ -140,152 +157,144 @@ def _ensure_source_path(option: SourceOption) -> Path: return option.source_path.source_path -def _geotag_images( - paths: T.Iterable[Path], option: SourceOption -) -> list[types.ImageMetadataOrError]: - image_paths, _ = _filter_images_and_videos(paths, option.filetypes) - - if not image_paths: - return [] - +def _build_image_geotag(option: SourceOption) -> base.GeotagImagesFromGeneric | None: + """ + Build a GeotagImagesFromGeneric object based on the provided option. + """ if option.interpolation is None: interpolation = InterpolationOption() else: interpolation = option.interpolation - geotag: base.GeotagImagesFromGeneric - - if option.source is SourceType.NATIVE: - geotag = geotag_images_from_exif.GeotagImagesFromEXIF( + if option.source in [SourceType.EXIF, SourceType.NATIVE]: + return geotag_images_from_exif.GeotagImagesFromEXIF( num_processes=option.num_processes ) - return geotag.to_description(image_paths) if option.source is SourceType.EXIFTOOL_RUNTIME: - geotag = geotag_images_from_exiftool.GeotagImagesFromExifToolRunner( + return geotag_images_from_exiftool.GeotagImagesFromExifToolRunner( num_processes=option.num_processes ) - try: - return geotag.to_description(image_paths) - except exceptions.MapillaryExiftoolNotFoundError as ex: - LOG.warning('Skip "%s" because: %s', option.source.value, ex) - return [] elif option.source is SourceType.EXIFTOOL_XML: # This is to ensure 'video_process --geotag={"source": "exiftool_xml", "source_path": "/tmp/xml_path"}' # to work - geotag = geotag_images_from_exiftool.GeotagImagesFromExifToolWithSamples( - xml_path=_ensure_source_path(option), + if option.source_path is None: + raise exceptions.MapillaryBadParameterError( + "source_path must be provided for EXIFTOOL_XML source" + ) + return geotag_images_from_exiftool.GeotagImagesFromExifToolWithSamples( + source_path=option.source_path, num_processes=option.num_processes, ) - return geotag.to_description(image_paths) elif option.source is SourceType.GPX: - geotag = geotag_images_from_gpx_file.GeotagImagesFromGPXFile( + return geotag_images_from_gpx_file.GeotagImagesFromGPXFile( source_path=_ensure_source_path(option), use_gpx_start_time=interpolation.use_gpx_start_time, offset_time=interpolation.offset_time, num_processes=option.num_processes, ) - return geotag.to_description(image_paths) elif option.source is SourceType.NMEA: - geotag = geotag_images_from_nmea_file.GeotagImagesFromNMEAFile( + return geotag_images_from_nmea_file.GeotagImagesFromNMEAFile( source_path=_ensure_source_path(option), use_gpx_start_time=interpolation.use_gpx_start_time, offset_time=interpolation.offset_time, num_processes=option.num_processes, ) - return geotag.to_description(image_paths) - - elif option.source is SourceType.EXIF: - geotag = geotag_images_from_exif.GeotagImagesFromEXIF( - num_processes=option.num_processes - ) - return geotag.to_description(image_paths) - - elif option.source in [ - SourceType.GOPRO, - SourceType.BLACKVUE, - SourceType.CAMM, - ]: - map_geotag_source_to_filetype: dict[SourceType, FileType] = { - SourceType.GOPRO: FileType.GOPRO, - SourceType.BLACKVUE: FileType.BLACKVUE, - SourceType.CAMM: FileType.CAMM, - } - video_paths = utils.find_videos([_ensure_source_path(option)]) - image_samples_by_video_path = utils.find_all_image_samples( - image_paths, video_paths - ) - video_paths_with_image_samples = list(image_samples_by_video_path.keys()) - video_metadatas = geotag_videos_from_video.GeotagVideosFromVideo( - filetypes={map_geotag_source_to_filetype[option.source]}, - num_processes=option.num_processes, - ).to_description(video_paths_with_image_samples) - geotag = geotag_images_from_video.GeotagImagesFromVideo( - video_metadatas, + elif option.source in [SourceType.GOPRO, SourceType.BLACKVUE, SourceType.CAMM]: + return geotag_images_from_video.GeotagImageSamplesFromVideo( + _ensure_source_path(option), offset_time=interpolation.offset_time, num_processes=option.num_processes, ) - return geotag.to_description(image_paths) else: raise ValueError(f"Invalid geotag source {option.source}") -def _geotag_videos( - paths: T.Iterable[Path], option: SourceOption -) -> list[types.VideoMetadataOrError]: - _, video_paths = _filter_images_and_videos(paths, option.filetypes) - - if not video_paths: - return [] - - geotag: base.GeotagVideosFromGeneric +def _build_video_geotag(option: SourceOption) -> base.GeotagVideosFromGeneric | None: + """ + Build a GeotagVideosFromGeneric object based on the provided option. + Examples: + >>> from pathlib import Path + >>> from mapillary_tools.geotag.options import SourceOption, SourceType + >>> opt = SourceOption(SourceType.NATIVE) + >>> geotagger = _build_video_geotag(opt) + >>> geotagger.__class__.__name__ + 'GeotagVideosFromVideo' + + >>> opt = SourceOption(SourceType.EXIFTOOL_RUNTIME) + >>> geotagger = _build_video_geotag(opt) + >>> geotagger.__class__.__name__ + 'GeotagVideosFromExifToolRunner' + + >>> opt = SourceOption(SourceType.EXIFTOOL_XML, source_path=Path("/tmp/test.xml")) + >>> geotagger = _build_video_geotag(opt) + >>> geotagger.__class__.__name__ + 'GeotagVideosFromExifToolXML' + + >>> opt = SourceOption(SourceType.GPX, source_path=Path("/tmp/test.gpx")) + >>> geotagger = _build_video_geotag(opt) + >>> geotagger.__class__.__name__ + 'GeotagVideosFromGPX' + + >>> opt = SourceOption(SourceType.NMEA, source_path=Path("/tmp/test.nmea")) + >>> _build_video_geotag(opt) is None + True + + >>> opt = SourceOption(SourceType.EXIF) + >>> _build_video_geotag(opt) is None + True + + >>> opt = SourceOption(SourceType.GOPRO) + >>> _build_video_geotag(opt) is None + True + + >>> try: + ... _build_video_geotag(SourceOption("invalid")) + ... except ValueError as e: + ... "Invalid geotag source" in str(e) + True + """ if option.source is SourceType.NATIVE: - geotag = geotag_videos_from_video.GeotagVideosFromVideo( + return geotag_videos_from_video.GeotagVideosFromVideo( num_processes=option.num_processes, filetypes=option.filetypes ) - return geotag.to_description(video_paths) if option.source is SourceType.EXIFTOOL_RUNTIME: - geotag = geotag_videos_from_exiftool.GeotagVideosFromExifToolRunner( + return geotag_videos_from_exiftool.GeotagVideosFromExifToolRunner( num_processes=option.num_processes ) - try: - return geotag.to_description(video_paths) - except exceptions.MapillaryExiftoolNotFoundError as ex: - LOG.warning('Skip "%s" because: %s', option.source.value, ex) - return [] elif option.source is SourceType.EXIFTOOL_XML: - geotag = geotag_videos_from_exiftool.GeotagVideosFromExifToolXML( - xml_path=_ensure_source_path(option), + if option.source_path is None: + raise exceptions.MapillaryBadParameterError( + "source_path must be provided for EXIFTOOL_XML source" + ) + return geotag_videos_from_exiftool.GeotagVideosFromExifToolXML( + source_path=option.source_path, ) - return geotag.to_description(video_paths) elif option.source is SourceType.GPX: - geotag = geotag_videos_from_gpx.GeotagVideosFromGPX() - return geotag.to_description(video_paths) + return geotag_videos_from_gpx.GeotagVideosFromGPX( + source_path=option.source_path, num_processes=option.num_processes + ) elif option.source is SourceType.NMEA: # TODO: geotag videos from NMEA - return [] + return None elif option.source is SourceType.EXIF: # Legacy image-specific geotag types - return [] + return None - elif option.source in [ - SourceType.GOPRO, - SourceType.BLACKVUE, - SourceType.CAMM, - ]: + elif option.source in [SourceType.GOPRO, SourceType.BLACKVUE, SourceType.CAMM]: # Legacy image-specific geotag types - return [] + return None else: raise ValueError(f"Invalid geotag source {option.source}") diff --git a/mapillary_tools/geotag/geotag_images_from_exiftool.py b/mapillary_tools/geotag/geotag_images_from_exiftool.py index e5c42ac76..6c74e0a99 100644 --- a/mapillary_tools/geotag/geotag_images_from_exiftool.py +++ b/mapillary_tools/geotag/geotag_images_from_exiftool.py @@ -13,29 +13,25 @@ from .. import constants, exceptions, exiftool_read, types, utils from ..exiftool_runner import ExiftoolRunner +from . import options from .base import GeotagImagesFromGeneric from .geotag_images_from_video import GeotagImagesFromVideo from .geotag_videos_from_exiftool import GeotagVideosFromExifToolXML from .image_extractors.exiftool import ImageExifToolExtractor -from .utils import index_rdf_description_by_path LOG = logging.getLogger(__name__) class GeotagImagesFromExifToolXML(GeotagImagesFromGeneric): def __init__( - self, - xml_path: Path, - num_processes: int | None = None, + self, source_path: options.SourcePathOption, num_processes: int | None = None ): - self.xml_path = xml_path + self.source_path = source_path super().__init__(num_processes=num_processes) @classmethod def build_image_extractors( - cls, - rdf_by_path: dict[str, ET.Element], - image_paths: T.Iterable[Path], + cls, rdf_by_path: dict[str, ET.Element], image_paths: T.Iterable[Path] ) -> list[ImageExifToolExtractor | types.ErrorMetadata]: results: list[ImageExifToolExtractor | types.ErrorMetadata] = [] @@ -59,7 +55,9 @@ def build_image_extractors( def _generate_image_extractors( self, image_paths: T.Sequence[Path] ) -> T.Sequence[ImageExifToolExtractor | types.ErrorMetadata]: - rdf_by_path = index_rdf_description_by_path([self.xml_path]) + rdf_by_path = GeotagVideosFromExifToolXML.find_rdf_by_path( + self.source_path, image_paths + ) return self.build_image_extractors(rdf_by_path, image_paths) @@ -68,7 +66,10 @@ class GeotagImagesFromExifToolRunner(GeotagImagesFromGeneric): def _generate_image_extractors( self, image_paths: T.Sequence[Path] ) -> T.Sequence[ImageExifToolExtractor | types.ErrorMetadata]: - runner = ExiftoolRunner(constants.EXIFTOOL_PATH) + if constants.EXIFTOOL_PATH is None: + runner = ExiftoolRunner() + else: + runner = ExiftoolRunner(constants.EXIFTOOL_PATH) LOG.debug( "Extracting XML from %d images with ExifTool command: %s", @@ -78,7 +79,13 @@ def _generate_image_extractors( try: xml = runner.extract_xml(image_paths) except FileNotFoundError as ex: - raise exceptions.MapillaryExiftoolNotFoundError(ex) from ex + exiftool_ex = exceptions.MapillaryExiftoolNotFoundError(ex) + return [ + types.describe_error_metadata( + exiftool_ex, image_path, filetype=types.FileType.IMAGE + ) + for image_path in image_paths + ] try: xml_element = ET.fromstring(xml) @@ -102,29 +109,30 @@ def _generate_image_extractors( class GeotagImagesFromExifToolWithSamples(GeotagImagesFromGeneric): def __init__( self, - xml_path: Path, + source_path: options.SourcePathOption, offset_time: float = 0.0, num_processes: int | None = None, ): super().__init__(num_processes=num_processes) - self.xml_path = xml_path + self.source_path = source_path self.offset_time = offset_time def geotag_samples( self, image_paths: T.Sequence[Path] ) -> list[types.ImageMetadataOrError]: # Find all video paths in self.xml_path - rdf_by_path = index_rdf_description_by_path([self.xml_path]) + rdf_by_path = GeotagVideosFromExifToolXML.find_rdf_by_path( + self.source_path, image_paths + ) video_paths = utils.find_videos( - [Path(pathstr) for pathstr in rdf_by_path.keys()], + [Path(canonical_path) for canonical_path in rdf_by_path.keys()], skip_subfolders=True, ) # Find all video paths that have sample images samples_by_video = utils.find_all_image_samples(image_paths, video_paths) video_metadata_or_errors = GeotagVideosFromExifToolXML( - self.xml_path, - num_processes=self.num_processes, + self.source_path, num_processes=self.num_processes ).to_description(list(samples_by_video.keys())) sample_paths = sum(samples_by_video.values(), []) sample_metadata_or_errors = GeotagImagesFromVideo( @@ -146,8 +154,7 @@ def to_description( non_sample_paths = [path for path in image_paths if path not in sample_paths] non_sample_metadata_or_errors = GeotagImagesFromExifToolXML( - self.xml_path, - num_processes=self.num_processes, + self.source_path, num_processes=self.num_processes ).to_description(non_sample_paths) return sample_metadata_or_errors + non_sample_metadata_or_errors diff --git a/mapillary_tools/geotag/geotag_images_from_gpx.py b/mapillary_tools/geotag/geotag_images_from_gpx.py index 2ec0b9d2a..5e0b9317a 100644 --- a/mapillary_tools/geotag/geotag_images_from_gpx.py +++ b/mapillary_tools/geotag/geotag_images_from_gpx.py @@ -20,6 +20,12 @@ LOG = logging.getLogger(__name__) +class SyncMode: + SYNC = "sync" + STRICT_SYNC = "strict_sync" + RESET = "reset" + + class GeotagImagesFromGPX(GeotagImagesFromGeneric): def __init__( self, diff --git a/mapillary_tools/geotag/geotag_images_from_video.py b/mapillary_tools/geotag/geotag_images_from_video.py index 6d032b21b..91468d955 100644 --- a/mapillary_tools/geotag/geotag_images_from_video.py +++ b/mapillary_tools/geotag/geotag_images_from_video.py @@ -13,6 +13,7 @@ from .. import types, utils from .base import GeotagImagesFromGeneric from .geotag_images_from_gpx import GeotagImagesFromGPX +from .geotag_videos_from_video import GeotagVideosFromVideo LOG = logging.getLogger(__name__) @@ -90,3 +91,37 @@ def to_description( assert len(final_image_metadatas) <= len(image_paths) return final_image_metadatas + + +class GeotagImageSamplesFromVideo(GeotagImagesFromGeneric): + def __init__( + self, + source_path: Path, + filetypes: set[types.FileType] | None = None, + offset_time: float = 0.0, + num_processes: int | None = None, + ): + super().__init__(num_processes=num_processes) + self.source_path = source_path + self.filetypes = filetypes + self.offset_time = offset_time + + @override + def to_description( + self, image_paths: T.Sequence[Path] + ) -> list[types.ImageMetadataOrError]: + video_paths = utils.find_videos([self.source_path]) + image_samples_by_video_path = utils.find_all_image_samples( + image_paths, video_paths + ) + video_paths_with_image_samples = list(image_samples_by_video_path.keys()) + video_metadatas = GeotagVideosFromVideo( + filetypes=self.filetypes, + num_processes=self.num_processes, + ).to_description(video_paths_with_image_samples) + geotag = GeotagImagesFromVideo( + video_metadatas, + offset_time=self.offset_time, + num_processes=self.num_processes, + ) + return geotag.to_description(image_paths) diff --git a/mapillary_tools/geotag/geotag_videos_from_exiftool.py b/mapillary_tools/geotag/geotag_videos_from_exiftool.py index 5b7de6839..04bbef4c5 100644 --- a/mapillary_tools/geotag/geotag_videos_from_exiftool.py +++ b/mapillary_tools/geotag/geotag_videos_from_exiftool.py @@ -13,6 +13,7 @@ from .. import constants, exceptions, exiftool_read, types from ..exiftool_runner import ExiftoolRunner +from . import options from .base import GeotagVideosFromGeneric from .utils import index_rdf_description_by_path from .video_extractors.exiftool import VideoExifToolExtractor @@ -22,18 +23,14 @@ class GeotagVideosFromExifToolXML(GeotagVideosFromGeneric): def __init__( - self, - xml_path: Path, - num_processes: int | None = None, + self, source_path: options.SourcePathOption, num_processes: int | None = None ): super().__init__(num_processes=num_processes) - self.xml_path = xml_path + self.source_path = source_path @classmethod - def build_image_extractors( - cls, - rdf_by_path: dict[str, ET.Element], - video_paths: T.Iterable[Path], + def build_video_extractors_from_etree( + cls, rdf_by_path: dict[str, ET.Element], video_paths: T.Iterable[Path] ) -> list[VideoExifToolExtractor | types.ErrorMetadata]: results: list[VideoExifToolExtractor | types.ErrorMetadata] = [] @@ -57,8 +54,28 @@ def build_image_extractors( def _generate_video_extractors( self, video_paths: T.Sequence[Path] ) -> T.Sequence[VideoExifToolExtractor | types.ErrorMetadata]: - rdf_by_path = index_rdf_description_by_path([self.xml_path]) - return self.build_image_extractors(rdf_by_path, video_paths) + rdf_by_path = self.find_rdf_by_path(self.source_path, video_paths) + return self.build_video_extractors_from_etree(rdf_by_path, video_paths) + + @classmethod + def find_rdf_by_path( + cls, option: options.SourcePathOption, paths: T.Iterable[Path] + ) -> dict[str, ET.Element]: + if option.source_path is not None: + return index_rdf_description_by_path([option.source_path]) + + elif option.pattern is not None: + rdf_by_path = {} + for path in paths: + source_path = option.resolve(path) + r = index_rdf_description_by_path([source_path]) + rdfs = list(r.values()) + if rdfs: + rdf_by_path[exiftool_read.canonical_path(path)] = rdfs[0] + return rdf_by_path + + else: + assert False, "Either source_path or pattern must be provided" class GeotagVideosFromExifToolRunner(GeotagVideosFromGeneric): @@ -66,7 +83,10 @@ class GeotagVideosFromExifToolRunner(GeotagVideosFromGeneric): def _generate_video_extractors( self, video_paths: T.Sequence[Path] ) -> T.Sequence[VideoExifToolExtractor | types.ErrorMetadata]: - runner = ExiftoolRunner(constants.EXIFTOOL_PATH) + if constants.EXIFTOOL_PATH is None: + runner = ExiftoolRunner() + else: + runner = ExiftoolRunner(constants.EXIFTOOL_PATH) LOG.debug( "Extracting XML from %d videos with ExifTool command: %s", @@ -76,7 +96,13 @@ def _generate_video_extractors( try: xml = runner.extract_xml(video_paths) except FileNotFoundError as ex: - raise exceptions.MapillaryExiftoolNotFoundError(ex) from ex + exiftool_ex = exceptions.MapillaryExiftoolNotFoundError(ex) + return [ + types.describe_error_metadata( + exiftool_ex, video_path, filetype=types.FileType.VIDEO + ) + for video_path in video_paths + ] try: xml_element = ET.fromstring(xml) @@ -92,6 +118,6 @@ def _generate_video_extractors( xml_element ) - return GeotagVideosFromExifToolXML.build_image_extractors( + return GeotagVideosFromExifToolXML.build_video_extractors_from_etree( rdf_by_path, video_paths ) diff --git a/mapillary_tools/geotag/geotag_videos_from_gpx.py b/mapillary_tools/geotag/geotag_videos_from_gpx.py index a5e8afd85..ae9fb1851 100644 --- a/mapillary_tools/geotag/geotag_videos_from_gpx.py +++ b/mapillary_tools/geotag/geotag_videos_from_gpx.py @@ -10,6 +10,7 @@ else: from typing_extensions import override +from .. import exceptions, types from . import options from .base import GeotagVideosFromGeneric from .video_extractors.gpx import GPXVideoExtractor @@ -21,19 +22,31 @@ class GeotagVideosFromGPX(GeotagVideosFromGeneric): def __init__( self, - option: options.SourcePathOption | None = None, + source_path: options.SourcePathOption | None = None, num_processes: int | None = None, ): super().__init__(num_processes=num_processes) - if option is None: - option = options.SourcePathOption(pattern="%f.gpx") - self.option = option + if source_path is None: + source_path = options.SourcePathOption(pattern="%g.gpx") + self.source_path = source_path @override def _generate_video_extractors( self, video_paths: T.Sequence[Path] - ) -> T.Sequence[GPXVideoExtractor]: - return [ - GPXVideoExtractor(video_path, self.option.resolve(video_path)) - for video_path in video_paths - ] + ) -> T.Sequence[GPXVideoExtractor | types.ErrorMetadata]: + results: list[GPXVideoExtractor | types.ErrorMetadata] = [] + for video_path in video_paths: + source_path = self.source_path.resolve(video_path) + if source_path.is_file(): + results.append(GPXVideoExtractor(video_path, source_path)) + else: + results.append( + types.describe_error_metadata( + exceptions.MapillaryVideoGPSNotFoundError( + "GPX file not found for video" + ), + filename=video_path, + filetype=types.FileType.VIDEO, + ) + ) + return results diff --git a/mapillary_tools/geotag/options.py b/mapillary_tools/geotag/options.py index c3c243e61..81e1cf4d3 100644 --- a/mapillary_tools/geotag/options.py +++ b/mapillary_tools/geotag/options.py @@ -60,9 +60,13 @@ def from_dict(cls, data: dict[str, T.Any]) -> SourceOption: elif k == "filetypes": kwargs[k] = {types.FileType(t) for t in v} elif k == "source_path": - kwargs.setdefault("source_path", SourcePathOption()).source_path = v + kwargs.setdefault( + "source_path", SourcePathOption(source_path=Path(v)) + ).sourthe_path = Path(v) elif k == "pattern": - kwargs.setdefault("source_path", SourcePathOption()).pattern = v + kwargs.setdefault( + "source_path", SourcePathOption(pattern=v) + ).pattern = v elif k == "interpolation_offset_time": kwargs.setdefault( "interpolation", InterpolationOption() @@ -85,6 +89,24 @@ def __post_init__(self): raise ValueError("Either pattern or source_path must be provided") def resolve(self, path: Path) -> Path: + """ + Resolve the source path or pattern against the given path. + + Examples: + >>> from pathlib import Path + >>> opt = SourcePathOption(source_path=Path("/foo/bar.mp4")) + >>> opt.resolve(Path("/baz/qux.mp4")) + PosixPath('/foo/bar.mp4') + + >>> opt = SourcePathOption(pattern="videos/%g_sub%e") + >>> opt.resolve(Path("/data/video1.mp4")) + PosixPath('/data/videos/video1_sub.mp4') + + >>> opt = SourcePathOption(pattern="/abs/path/%f") + >>> opt.resolve(Path("/tmp/abc.mov")) + PosixPath('/abs/path/abc.mov') + """ + if self.source_path is not None: return self.source_path @@ -140,7 +162,7 @@ class InterpolationOption: "type": "integer", }, "interpolation_offset_time": { - "type": "float", + "type": "number", }, "interpolation_use_gpx_start_time": { "type": "boolean", diff --git a/mapillary_tools/geotag/video_extractors/base.py b/mapillary_tools/geotag/video_extractors/base.py index 1a2e03b3c..7bf7a34ba 100644 --- a/mapillary_tools/geotag/video_extractors/base.py +++ b/mapillary_tools/geotag/video_extractors/base.py @@ -14,5 +14,5 @@ class BaseVideoExtractor(abc.ABC): def __init__(self, video_path: Path): self.video_path = video_path - def extract(self) -> types.VideoMetadataOrError: + def extract(self) -> types.VideoMetadata: raise NotImplementedError diff --git a/mapillary_tools/geotag/video_extractors/exiftool.py b/mapillary_tools/geotag/video_extractors/exiftool.py index bb51863a5..e2e0f6956 100644 --- a/mapillary_tools/geotag/video_extractors/exiftool.py +++ b/mapillary_tools/geotag/video_extractors/exiftool.py @@ -21,7 +21,7 @@ def __init__(self, video_path: Path, element: ET.Element): self.element = element @override - def extract(self) -> types.VideoMetadataOrError: + def extract(self) -> types.VideoMetadata: exif = exiftool_read_video.ExifToolReadVideo(ET.ElementTree(self.element)) make = exif.extract_make() diff --git a/mapillary_tools/geotag/video_extractors/gpx.py b/mapillary_tools/geotag/video_extractors/gpx.py index 560fa4294..6d08f5b5b 100644 --- a/mapillary_tools/geotag/video_extractors/gpx.py +++ b/mapillary_tools/geotag/video_extractors/gpx.py @@ -1,7 +1,7 @@ from __future__ import annotations import dataclasses -import datetime +import enum import logging import sys import typing as T @@ -12,7 +12,7 @@ else: from typing_extensions import override -from ... import geo, telemetry, types +from ... import exceptions, geo, telemetry, types from ..utils import parse_gpx from .base import BaseVideoExtractor from .native import NativeVideoExtractor @@ -21,106 +21,96 @@ LOG = logging.getLogger(__name__) +class SyncMode(enum.Enum): + # Sync by video GPS timestamps if found, otherwise rebase + SYNC = "sync" + # Sync by video GPS timestamps, and throw if not found + STRICT_SYNC = "strict_sync" + # Rebase all GPX timestamps to start from 0 + REBASE = "rebase" + + class GPXVideoExtractor(BaseVideoExtractor): - def __init__(self, video_path: Path, gpx_path: Path): + def __init__( + self, video_path: Path, gpx_path: Path, sync_mode: SyncMode = SyncMode.SYNC + ): self.video_path = video_path self.gpx_path = gpx_path + self.sync_mode = sync_mode @override - def extract(self) -> types.VideoMetadataOrError: - try: - gpx_tracks = parse_gpx(self.gpx_path) - except Exception as ex: - raise RuntimeError( - f"Error parsing GPX {self.gpx_path}: {ex.__class__.__name__}: {ex}" - ) + def extract(self) -> types.VideoMetadata: + gpx_tracks = parse_gpx(self.gpx_path) if 1 < len(gpx_tracks): LOG.warning( - "Found %s tracks in the GPX file %s. Will merge points in all the tracks as a single track for interpolation", - len(gpx_tracks), - self.gpx_path, + f"Found {len(gpx_tracks)} tracks in the GPX file {self.gpx_path}. Will merge points in all the tracks as a single track for interpolation" ) gpx_points: T.Sequence[geo.Point] = sum(gpx_tracks, []) native_extractor = NativeVideoExtractor(self.video_path) - video_metadata_or_error = native_extractor.extract() - - if isinstance(video_metadata_or_error, types.ErrorMetadata): + try: + native_video_metadata = native_extractor.extract() + except exceptions.MapillaryVideoGPSNotFoundError as ex: + if self.sync_mode is SyncMode.STRICT_SYNC: + raise ex self._rebase_times(gpx_points) return types.VideoMetadata( - filename=video_metadata_or_error.filename, - filetype=video_metadata_or_error.filetype or types.FileType.VIDEO, + filename=self.video_path, + filetype=types.FileType.VIDEO, points=gpx_points, ) - video_metadata = video_metadata_or_error - - offset = self._synx_gpx_by_first_gps_timestamp( - gpx_points, video_metadata.points - ) - - self._rebase_times(gpx_points, offset=offset) + if self.sync_mode is SyncMode.REBASE: + self._rebase_times(gpx_points) + else: + offset = self._gpx_offset(gpx_points, native_video_metadata.points) + self._rebase_times(gpx_points, offset=offset) - return dataclasses.replace(video_metadata_or_error, points=gpx_points) + return dataclasses.replace(native_video_metadata, points=gpx_points) - @staticmethod - def _rebase_times(points: T.Sequence[geo.Point], offset: float = 0.0): + @classmethod + def _rebase_times(cls, points: T.Sequence[geo.Point], offset: float = 0.0) -> None: """ - Make point times start from 0 + Rebase point times to start from **offset** """ if points: first_timestamp = points[0].time for p in points: p.time = (p.time - first_timestamp) + offset - return points - def _synx_gpx_by_first_gps_timestamp( - self, gpx_points: T.Sequence[geo.Point], video_gps_points: T.Sequence[geo.Point] + @classmethod + def _gpx_offset( + cls, gpx_points: T.Sequence[geo.Point], video_gps_points: T.Sequence[geo.Point] ) -> float: + """ + Calculate the offset that needs to be applied to the GPX points to sync with the video GPS points. + + >>> gpx_points = [geo.Point(time=5, lat=1, lon=1, alt=None, angle=None)] + >>> GPXVideoExtractor._gpx_offset(gpx_points, gpx_points) + 0.0 + >>> GPXVideoExtractor._gpx_offset(gpx_points, []) + 0.0 + >>> GPXVideoExtractor._gpx_offset([], gpx_points) + 0.0 + """ offset: float = 0.0 - if not gpx_points: - return offset - - first_gpx_dt = datetime.datetime.fromtimestamp( - gpx_points[0].time, tz=datetime.timezone.utc - ) - LOG.info("First GPX timestamp: %s", first_gpx_dt) - - if not video_gps_points: - LOG.warning( - "Skip GPX synchronization because no GPS found in video %s", - self.video_path, - ) + if not gpx_points or not video_gps_points: return offset - first_gps_point = video_gps_points[0] - if isinstance(first_gps_point, telemetry.GPSPoint): - if first_gps_point.epoch_time is not None: - first_gps_dt = datetime.datetime.fromtimestamp( - first_gps_point.epoch_time, tz=datetime.timezone.utc - ) - LOG.info("First GPS timestamp: %s", first_gps_dt) - offset = gpx_points[0].time - first_gps_point.epoch_time - if offset: - LOG.warning( - "Found offset between GPX %s and video GPS timestamps %s: %s seconds", - first_gpx_dt, - first_gps_dt, - offset, - ) - else: - LOG.info( - "GPX and GPS are perfectly synchronized (all starts from %s)", - first_gpx_dt, - ) - else: - LOG.warning( - "Skip GPX synchronization because no GPS epoch time found in video %s", - self.video_path, - ) + gps_epoch_time: float | None = None + gps_point = video_gps_points[0] + if isinstance(gps_point, telemetry.GPSPoint): + if gps_point.epoch_time is not None: + gps_epoch_time = gps_point.epoch_time + elif isinstance(gps_point, telemetry.CAMMGPSPoint): + if gps_point.time_gps_epoch is not None: + gps_epoch_time = gps_point.time_gps_epoch + + if gps_epoch_time is not None: + offset = gpx_points[0].time - gps_epoch_time return offset diff --git a/mapillary_tools/geotag/video_extractors/native.py b/mapillary_tools/geotag/video_extractors/native.py index b30d3160e..ca434ced3 100644 --- a/mapillary_tools/geotag/video_extractors/native.py +++ b/mapillary_tools/geotag/video_extractors/native.py @@ -17,7 +17,7 @@ class GoProVideoExtractor(BaseVideoExtractor): @override - def extract(self) -> types.VideoMetadataOrError: + def extract(self) -> types.VideoMetadata: with self.video_path.open("rb") as fp: gopro_info = gpmf_parser.extract_gopro_info(fp) @@ -29,23 +29,13 @@ def extract(self) -> types.VideoMetadataOrError: gps_points = gopro_info.gps assert gps_points is not None, "must have GPS data extracted" if not gps_points: - # Instead of raising an exception, return error metadata to tell the file type - ex: exceptions.MapillaryDescriptionError = ( - exceptions.MapillaryGPXEmptyError("Empty GPS data found") - ) - return types.describe_error_metadata( - ex, self.video_path, filetype=types.FileType.GOPRO - ) + raise exceptions.MapillaryGPXEmptyError("Empty GPS data found") gps_points = T.cast( T.List[telemetry.GPSPoint], gpmf_gps_filter.remove_noisy_points(gps_points) ) if not gps_points: - # Instead of raising an exception, return error metadata to tell the file type - ex = exceptions.MapillaryGPSNoiseError("GPS is too noisy") - return types.describe_error_metadata( - ex, self.video_path, filetype=types.FileType.GOPRO - ) + raise exceptions.MapillaryGPSNoiseError("GPS is too noisy") video_metadata = types.VideoMetadata( filename=self.video_path, @@ -61,7 +51,7 @@ def extract(self) -> types.VideoMetadataOrError: class CAMMVideoExtractor(BaseVideoExtractor): @override - def extract(self) -> types.VideoMetadataOrError: + def extract(self) -> types.VideoMetadata: with self.video_path.open("rb") as fp: camm_info = camm_parser.extract_camm_info(fp) @@ -71,13 +61,7 @@ def extract(self) -> types.VideoMetadataOrError: ) if not camm_info.gps and not camm_info.mini_gps: - # Instead of raising an exception, return error metadata to tell the file type - ex: exceptions.MapillaryDescriptionError = ( - exceptions.MapillaryGPXEmptyError("Empty GPS data found") - ) - return types.describe_error_metadata( - ex, self.video_path, filetype=types.FileType.CAMM - ) + raise exceptions.MapillaryGPXEmptyError("Empty GPS data found") return types.VideoMetadata( filename=self.video_path, @@ -91,7 +75,7 @@ def extract(self) -> types.VideoMetadataOrError: class BlackVueVideoExtractor(BaseVideoExtractor): @override - def extract(self) -> types.VideoMetadataOrError: + def extract(self) -> types.VideoMetadata: with self.video_path.open("rb") as fp: blackvue_info = blackvue_parser.extract_blackvue_info(fp) @@ -101,19 +85,13 @@ def extract(self) -> types.VideoMetadataOrError: ) if not blackvue_info.gps: - # Instead of raising an exception, return error metadata to tell the file type - ex: exceptions.MapillaryDescriptionError = ( - exceptions.MapillaryGPXEmptyError("Empty GPS data found") - ) - return types.describe_error_metadata( - ex, self.video_path, filetype=types.FileType.BLACKVUE - ) + raise exceptions.MapillaryGPXEmptyError("Empty GPS data found") video_metadata = types.VideoMetadata( filename=self.video_path, filesize=utils.get_file_size(self.video_path), filetype=types.FileType.BLACKVUE, - points=blackvue_info.gps or [], + points=blackvue_info.gps, make=blackvue_info.make, model=blackvue_info.model, ) @@ -127,7 +105,7 @@ def __init__(self, video_path: Path, filetypes: set[types.FileType] | None = Non self.filetypes = filetypes @override - def extract(self) -> types.VideoMetadataOrError: + def extract(self) -> types.VideoMetadata: ft = self.filetypes extractor: BaseVideoExtractor diff --git a/mapillary_tools/upload.py b/mapillary_tools/upload.py index 014e28bd8..99ba0ae49 100644 --- a/mapillary_tools/upload.py +++ b/mapillary_tools/upload.py @@ -1,6 +1,5 @@ from __future__ import annotations -import contextlib import json import logging import os @@ -671,18 +670,27 @@ def _load_valid_metadatas_from_desc_path( if desc_path is None: desc_path = _find_desc_path(import_paths) - with ( - contextlib.nullcontext(sys.stdin.buffer) - if desc_path == "-" - else open(desc_path, "rb") - ) as fp: + if desc_path == "-": try: - metadatas = DescriptionJSONSerializer.deserialize_stream(fp) + metadatas = DescriptionJSONSerializer.deserialize_stream(sys.stdin.buffer) except json.JSONDecodeError as ex: raise exceptions.MapillaryInvalidDescriptionFile( f"Invalid JSON stream from {desc_path}: {ex}" ) from ex + else: + if not os.path.isfile(desc_path): + raise exceptions.MapillaryFileNotFoundError( + f"Description file not found: {desc_path}" + ) + with open(desc_path, "rb") as fp: + try: + metadatas = DescriptionJSONSerializer.deserialize_stream(fp) + except json.JSONDecodeError as ex: + raise exceptions.MapillaryInvalidDescriptionFile( + f"Invalid JSON stream from {desc_path}: {ex}" + ) from ex + return metadatas diff --git a/mapillary_tools/uploader.py b/mapillary_tools/uploader.py index 55b468c85..388fcd3fa 100644 --- a/mapillary_tools/uploader.py +++ b/mapillary_tools/uploader.py @@ -378,6 +378,9 @@ def _upload_sequence( if progress is None: progress = {} + # FIXME: This is a hack to disable the event emitter inside the uploader + uploader_without_emitter = uploader.copy_uploader_without_emitter() + lock = threading.Lock() def _upload_image(image_metadata: types.ImageMetadata) -> str: @@ -387,7 +390,7 @@ def _upload_image(image_metadata: types.ImageMetadata) -> str: } bytes = cls._dump_image_bytes(image_metadata) - file_handle = uploader.upload_stream( + file_handle = uploader_without_emitter.upload_stream( io.BytesIO(bytes), progress=mutable_progress ) @@ -404,9 +407,6 @@ def _upload_image(image_metadata: types.ImageMetadata) -> str: # TODO: assert sequence is sorted - # FIXME: This is a hack to disable the event emitter inside the uploader - uploader.emittion_disabled = True - uploader.emitter.emit("upload_start", progress) with concurrent.futures.ThreadPoolExecutor( @@ -423,15 +423,12 @@ def _upload_image(image_metadata: types.ImageMetadata) -> str: with io.BytesIO() as manifest_fp: manifest_fp.write(json.dumps(manifest).encode("utf-8")) manifest_fp.seek(0, io.SEEK_SET) - manifest_file_handle = uploader.upload_stream( + manifest_file_handle = uploader_without_emitter.upload_stream( manifest_fp, session_key=f"{uuid.uuid4().hex}.json" ) uploader.emitter.emit("upload_end", progress) - # FIXME: This is a hack to disable the event emitter inside the uploader - uploader.emittion_disabled = False - cluster_id = uploader.finish_upload( manifest_file_handle, api_v4.ClusterFileType.MLY_BUNDLE_MANIFEST, @@ -485,7 +482,6 @@ def __init__( dry_run=False, ): self.user_items = user_items - self.emittion_disabled = False if emitter is None: # An empty event emitter that does nothing self.emitter = EventEmitter() @@ -522,7 +518,7 @@ def upload_stream( progress["retries"] = 0 progress["begin_offset"] = None - self._maybe_emit("upload_start", progress) + self.emitter.emit("upload_start", progress) while True: try: @@ -536,10 +532,48 @@ def upload_stream( progress["retries"] += 1 - self._maybe_emit("upload_end", progress) + self.emitter.emit("upload_end", progress) return file_handle + def finish_upload( + self, + file_handle: str, + cluster_filetype: api_v4.ClusterFileType, + progress: dict[str, T.Any] | None = None, + ) -> str: + """Finish upload with safe retries guraranteed""" + if progress is None: + progress = {} + + if self.dry_run: + cluster_id = "0" + else: + resp = api_v4.finish_upload( + self.user_items["user_upload_token"], + file_handle, + cluster_filetype, + organization_id=self.user_items.get("MAPOrganizationKey"), + ) + + data = resp.json() + cluster_id = data.get("cluster_id") + + # TODO: validate cluster_id + + progress["cluster_id"] = cluster_id + self.emitter.emit("upload_finished", progress) + + return cluster_id + + def copy_uploader_without_emitter(self) -> Uploader: + return Uploader( + self.user_items, + emitter=None, + chunk_size=self.chunk_size, + dry_run=self.dry_run, + ) + def _create_upload_service(self, session_key: str) -> upload_api_v4.UploadService: upload_service: upload_api_v4.UploadService @@ -570,7 +604,7 @@ def _handle_upload_exception( chunk_size = progress["chunk_size"] if retries <= constants.MAX_UPLOAD_RETRIES and _is_retriable_exception(ex): - self._maybe_emit("upload_interrupted", progress) + self.emitter.emit("upload_interrupted", progress) LOG.warning( # use %s instead of %d because offset could be None "Error uploading chunk_size %d at begin_offset %s: %s: %s", @@ -611,7 +645,7 @@ def _chunk_with_progress_emitted( # Whenever a chunk is uploaded, reset retries progress["retries"] = 0 - self._maybe_emit("upload_progress", progress) + self.emitter.emit("upload_progress", progress) def _upload_stream_retryable( self, @@ -626,7 +660,7 @@ def _upload_stream_retryable( progress["begin_offset"] = begin_offset progress["offset"] = begin_offset - self._maybe_emit("upload_fetch_offset", progress) + self.emitter.emit("upload_fetch_offset", progress) fp.seek(begin_offset, io.SEEK_SET) @@ -634,42 +668,6 @@ def _upload_stream_retryable( return upload_service.upload_shifted_chunks(shifted_chunks, begin_offset) - def finish_upload( - self, - file_handle: str, - cluster_filetype: api_v4.ClusterFileType, - progress: dict[str, T.Any] | None = None, - ) -> str: - """Finish upload with safe retries guraranteed""" - if progress is None: - progress = {} - - if self.dry_run: - cluster_id = "0" - else: - resp = api_v4.finish_upload( - self.user_items["user_upload_token"], - file_handle, - cluster_filetype, - organization_id=self.user_items.get("MAPOrganizationKey"), - ) - - data = resp.json() - cluster_id = data.get("cluster_id") - - # TODO: validate cluster_id - - progress["cluster_id"] = cluster_id - self._maybe_emit("upload_finished", progress) - - return cluster_id - - def _maybe_emit( - self, event: EventName, progress: dict[str, T.Any] | UploaderProgress - ): - if not self.emittion_disabled: - return self.emitter.emit(event, progress) - def _validate_metadatas(metadatas: T.Sequence[types.ImageMetadata]): for metadata in metadatas: diff --git a/tests/integration/fixtures.py b/tests/integration/fixtures.py index 19e6fc2b2..5c86f9554 100644 --- a/tests/integration/fixtures.py +++ b/tests/integration/fixtures.py @@ -2,8 +2,10 @@ import json import os +import shlex import shutil import subprocess +import sys import tempfile import zipfile from pathlib import Path @@ -32,11 +34,13 @@ def setup_config(tmpdir: py.path.local): os.environ["MAPILLARY_CONFIG_PATH"] = str(config_path) os.environ["MAPILLARY_TOOLS_PROMPT_DISABLED"] = "YES" os.environ["MAPILLARY_TOOLS__AUTH_VERIFICATION_DISABLED"] = "YES" - x = subprocess.run( - f"{EXECUTABLE} authenticate --user_name {USERNAME} --jwt test_user_token", - shell=True, + run_command( + [ + *["--user_name", USERNAME], + *["--jwt", "test_user_token"], + ], + command="authenticate", ) - assert x.returncode == 0, x.stderr yield config_path if tmpdir.check(): tmpdir.remove(ignore_errors=True) @@ -82,12 +86,14 @@ def _ffmpeg_installed(): [ffmpeg_path, "-version"], stderr=subprocess.PIPE, stdout=subprocess.PIPE, + check=True, ) # In Windows, ffmpeg is installed but ffprobe is not? subprocess.run( [ffprobe_path, "-version"], stderr=subprocess.PIPE, stdout=subprocess.PIPE, + check=True, ) except FileNotFoundError: return False @@ -103,7 +109,7 @@ def _exiftool_installed(): [EXIFTOOL_EXECUTABLE, "-ver"], stderr=subprocess.PIPE, stdout=subprocess.PIPE, - shell=True, + check=True, ) except FileNotFoundError: return False @@ -113,7 +119,9 @@ def _exiftool_installed(): IS_EXIFTOOL_INSTALLED = _exiftool_installed() -def run_exiftool(setup_data: py.path.local) -> py.path.local: +def run_exiftool_dir(setup_data: py.path.local) -> py.path.local: + pytest_skip_if_not_exiftool_installed() + exiftool_outuput_dir = setup_data.join("exiftool_outuput_dir") # The "-w %c" option in exiftool will produce duplicated XML files therefore clean up the folder first shutil.rmtree(exiftool_outuput_dir, ignore_errors=True) @@ -140,27 +148,37 @@ def run_exiftool(setup_data: py.path.local) -> py.path.local: # The solution is to use %c which suffixes the original filenames with an increasing number # -w C%c.txt # C.txt, C1.txt, C2.txt ... # -w C%.c.txt # C0.txt, C1.txt, C2.txt ... - subprocess.check_call( - f"{EXIFTOOL_EXECUTABLE} -r -ee -n -X -api LargeFileSupport=1 -w! {exiftool_outuput_dir}/%f%c.xml {setup_data}", - shell=True, + # TODO: Maybe replace with exiftool_runner + subprocess.run( + [ + EXIFTOOL_EXECUTABLE, + "-fast", # Fast processing + "-q", # Quiet mode + "-r", # Recursive + "-n", # Disable print conversion + "-X", # XML output + "-ee", + *["-api", "LargeFileSupport=1"], + *["-w!", str(exiftool_outuput_dir.join("%f%c.xml"))], + str(setup_data), + ], + check=True, ) return exiftool_outuput_dir def run_exiftool_and_generate_geotag_args( - test_data_dir: py.path.local, run_args: str -) -> str: - if not IS_EXIFTOOL_INSTALLED: - pytest.skip("exiftool not installed") - exiftool_outuput_dir = run_exiftool(test_data_dir) - exiftool_params = ( - f"--geotag_source exiftool_xml --geotag_source_path {exiftool_outuput_dir}" - ) - return f"{run_args} {exiftool_params}" + test_data_dir: py.path.local, run_args: list[str] +) -> list[str]: + pytest_skip_if_not_exiftool_installed() - -with open("schema/image_description_schema.json") as fp: - IMAGE_DESCRIPTION_SCHEMA = json.load(fp) + exiftool_outuput_dir = run_exiftool_dir(test_data_dir) + return [ + *run_args, + "--geotag_source=exiftool_xml", + "--geotag_source_path", + str(exiftool_outuput_dir), + ] def validate_and_extract_image(image_path: Path): @@ -207,27 +225,9 @@ def validate_and_extract_camm(video_path: Path) -> list[dict]: upload_md5sum, ) - if not IS_FFMPEG_INSTALLED: - return [] - - with tempfile.TemporaryDirectory() as tempdir: - x = subprocess.run( - f"{EXECUTABLE} --verbose video_process --video_sample_interval=2 --video_sample_distance=-1 --geotag_source=camm {str(video_path)} {tempdir}", - shell=True, - ) - assert x.returncode == 0, x.stderr - - # no exif written so we can't extract the image description - # descs = [] - # for root, _, files in os.walk(tempdir): - # for file in files: - # if file.endswith(".jpg"): - # descs.append(validate_and_extract_image(os.path.join(root, file))) - # return descs - - # instead, we return the mapillary_image_description.json - with open(os.path.join(tempdir, "mapillary_image_description.json")) as fp: - return json.load(fp) + return run_process_for_descs( + ["--file_types=camm", str(video_path)], command="process" + ) def load_descs(descs) -> list: @@ -335,6 +335,11 @@ def assert_compare_image_descs(expected: dict, actual: dict): if "MAPDeviceModel" in expected: assert expected["MAPDeviceModel"] == actual["MAPDeviceModel"] + if "MAPGPSTrack" in expected: + assert expected["MAPGPSTrack"] == actual["MAPGPSTrack"], ( + f"expect {expected['MAPGPSTrack']} but got {actual['MAPGPSTrack']} in {filename}" + ) + def assert_contains_image_descs(haystack: Path | list[dict], needle: Path | list[dict]): """ @@ -362,3 +367,87 @@ def assert_contains_image_descs(haystack: Path | list[dict], needle: Path | list def assert_same_image_descs(left: Path | list[dict], right: Path | list[dict]): assert_contains_image_descs(left, right) assert_contains_image_descs(right, left) + + +def assert_descs_exact_equal(left: list[dict], right: list[dict]): + assert len(left) == len(right) + + # TODO: make sure groups are the same too + for d in left: + d.pop("MAPSequenceUUID", None) + + for d in right: + d.pop("MAPSequenceUUID", None) + + left.sort(key=lambda d: d["filename"]) + right.sort(key=lambda d: d["filename"]) + + assert left == right + + +def run_command(params: list[str], command: str, **kwargs): + subprocess.run([*shlex.split(EXECUTABLE), command, *params], check=True, **kwargs) + + +def run_process_for_descs(params: list[str], command: str = "process", **kwargs): + # Make windows happy with delete=False + # https://github.com/mapillary/mapillary_tools/issues/503 + if sys.platform in ["win32"]: + delete = False + else: + delete = True + with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as desc_file: + try: + run_command( + [ + "--skip_process_errors", + *["--desc_path", str(desc_file.name)], + *params, + ], + command, + **kwargs, + ) + + with open(desc_file.name, "r") as fp: + fp.seek(0) + descs = json.load(fp) + + if not delete: + desc_file.close() + + finally: + if not delete: + try: + os.remove(desc_file.name) + except FileNotFoundError: + pass + + return descs + + +def run_process_and_upload_for_descs( + params: list[str], command="process_and_upload", **kwargs +): + return run_process_for_descs( + ["--dry_run", *["--user_name", USERNAME], *params], command=command, **kwargs + ) + + +def run_upload(params: list[str], **kwargs): + return run_command( + ["--dry_run", *["--user_name", USERNAME], *params], command="upload", **kwargs + ) + + +def pytest_skip_if_not_ffmpeg_installed(): + if not IS_FFMPEG_INSTALLED: + pytest.skip("ffmpeg is not installed, skipping the test") + + +def pytest_skip_if_not_exiftool_installed(): + if not IS_EXIFTOOL_INSTALLED: + pytest.skip("exiftool is not installed, skipping the test") + + +with open("schema/image_description_schema.json") as fp: + IMAGE_DESCRIPTION_SCHEMA = json.load(fp) diff --git a/tests/integration/test_gopro.py b/tests/integration/test_gopro.py index a7609717f..9ea014a4e 100644 --- a/tests/integration/test_gopro.py +++ b/tests/integration/test_gopro.py @@ -1,21 +1,23 @@ import copy +import functools import os -import subprocess import typing as T -from pathlib import Path import py.path import pytest from .fixtures import ( assert_same_image_descs, - EXECUTABLE, - IS_FFMPEG_INSTALLED, + pytest_skip_if_not_ffmpeg_installed, run_exiftool_and_generate_geotag_args, + run_process_for_descs, setup_config, setup_upload, ) +run_video_process_for_descs = functools.partial( + run_process_for_descs, command="video_process" +) IMPORT_PATH = "tests/data/gopro_data" TEST_ENVS = { @@ -134,25 +136,34 @@ def test_process_gopro_hero8( setup_data: py.path.local, use_exiftool: bool = False, ): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() + video_path = setup_data.join("hero8.mp4") + if use_exiftool: - args = f"{EXECUTABLE} --verbose video_process --video_sample_interval=2 --video_sample_distance=-1 {str(video_path)}" + args = [ + "--video_sample_interval=2", + "--video_sample_distance=-1", + str(video_path), + ] args = run_exiftool_and_generate_geotag_args(setup_data, args) else: - args = f"{EXECUTABLE} --verbose video_process --video_sample_interval=2 --video_sample_distance=-1 --geotag_source=gopro_videos {str(video_path)}" + args = [ + "--video_sample_interval=2", + "--video_sample_distance=-1", + "--geotag_source=gopro_videos", + str(video_path), + ] + env = os.environ.copy() env.update(TEST_ENVS) - x = subprocess.run(args, shell=True, env=env) - assert x.returncode == 0, x.stderr + descs = run_video_process_for_descs(args, env=env) sample_dir = setup_data.join("mapillary_sampled_video_frames") - desc_path = sample_dir.join("mapillary_image_description.json") expected_descs = copy.deepcopy(EXPECTED_DESCS) for expected_desc in expected_descs: expected_desc["filename"] = str(sample_dir.join(expected_desc["filename"])) - assert_same_image_descs(Path(desc_path), expected_descs) + assert_same_image_descs(descs, expected_descs) @pytest.mark.usefixtures("setup_config") diff --git a/tests/integration/test_history.py b/tests/integration/test_history.py index 62cd8d6ff..b6510eca9 100644 --- a/tests/integration/test_history.py +++ b/tests/integration/test_history.py @@ -1,52 +1,37 @@ -import subprocess - import py.path import pytest -from .fixtures import EXECUTABLE, setup_config, setup_data, setup_upload, USERNAME - -UPLOAD_FLAGS = f"--dry_run --user_name={USERNAME}" +from .fixtures import ( + run_process_and_upload_for_descs, + setup_config, + setup_data, + setup_upload, +) @pytest.mark.usefixtures("setup_config") -def test_upload_images( - setup_data: py.path.local, - setup_upload: py.path.local, -): +def test_upload_everything(setup_data: py.path.local, setup_upload: py.path.local): assert len(setup_upload.listdir()) == 0 - x = subprocess.run( - f"{EXECUTABLE} process_and_upload --file_types=image {UPLOAD_FLAGS} {str(setup_data)}", - shell=True, - ) - assert x.returncode == 0, x.stderr + run_process_and_upload_for_descs([str(setup_data)]) + assert 0 < len(setup_upload.listdir()), "should be uploaded for the first time" for upload in setup_upload.listdir(): upload.remove() - x = subprocess.run( - f"{EXECUTABLE} process_and_upload --file_types=image {UPLOAD_FLAGS} {str(setup_data)}", - shell=True, - ) - assert x.returncode == 0, x.stderr + run_process_and_upload_for_descs([str(setup_data)]) + assert len(setup_upload.listdir()) == 0, ( "should NOT upload because it is uploaded already" ) @pytest.mark.usefixtures("setup_config") -def test_upload_gopro( - setup_data: py.path.local, - setup_upload: py.path.local, -): +def test_upload_gopro(setup_data: py.path.local, setup_upload: py.path.local): assert len(setup_upload.listdir()) == 0 video_dir = setup_data.join("gopro_data") - x = subprocess.run( - f"{EXECUTABLE} process_and_upload --skip_process_errors {UPLOAD_FLAGS} {str(video_dir)}", - shell=True, - ) - assert x.returncode == 0, x.stderr + run_process_and_upload_for_descs([str(video_dir)]) assert len(setup_upload.listdir()) == 2, ( f"should be uploaded for the first time but got {setup_upload.listdir()}" ) @@ -55,11 +40,7 @@ def test_upload_gopro( upload.remove() assert len(setup_upload.listdir()) == 1 - x = subprocess.run( - f"{EXECUTABLE} process_and_upload --skip_process_errors {UPLOAD_FLAGS} {str(video_dir)}", - shell=True, - ) - assert x.returncode == 0, x.stderr + run_process_and_upload_for_descs([str(video_dir)]) assert len(setup_upload.listdir()) == 1, ( "should NOT upload because it is uploaded already" ) diff --git a/tests/integration/test_process.py b/tests/integration/test_process.py index a3afe06f2..59335bd21 100644 --- a/tests/integration/test_process.py +++ b/tests/integration/test_process.py @@ -1,26 +1,26 @@ import datetime import json +import os import subprocess -import tempfile from pathlib import Path -import exifread import py.path import pytest from .fixtures import ( assert_contains_image_descs, + assert_descs_exact_equal, EXECUTABLE, - IS_FFMPEG_INSTALLED, + pytest_skip_if_not_exiftool_installed, + run_command, run_exiftool_and_generate_geotag_args, - setup_config, + run_exiftool_dir, + run_process_for_descs, setup_data, validate_and_extract_zip, ) -PROCESS_FLAGS = "" - _DEFAULT_EXPECTED_DESCS = { "DSC00001.JPG": { "filename": "DSC00001.JPG", @@ -90,17 +90,16 @@ def test_basic(): def test_process_images_with_defaults( - setup_data: py.path.local, - use_exiftool: bool = False, + setup_data: py.path.local, use_exiftool: bool = False ): - args = f"{EXECUTABLE} process --file_types=image {PROCESS_FLAGS} {setup_data}" + args = ["--file_types=image", str(setup_data)] if use_exiftool: args = run_exiftool_and_generate_geotag_args(setup_data, args) - x = subprocess.run(args, shell=True) - assert x.returncode == 0, x.stderr + descs = run_process_for_descs(args) + assert_contains_image_descs( - Path(setup_data, "mapillary_image_description.json"), + descs, [ { **_DEFAULT_EXPECTED_DESCS["DSC00001.JPG"], @@ -121,13 +120,15 @@ def test_process_images_with_defaults_with_exiftool(setup_data: py.path.local): def test_time_with_offset(setup_data: py.path.local, use_exiftool: bool = False): - args = f"{EXECUTABLE} process --file_types=image {PROCESS_FLAGS} {setup_data} --offset_time=2.5" + args = ["--file_types=image", "--offset_time=2.5", str(setup_data)] + if use_exiftool: args = run_exiftool_and_generate_geotag_args(setup_data, args) - x = subprocess.run(args, shell=True) - assert x.returncode == 0, x.stderr + + descs = run_process_for_descs(args) + assert_contains_image_descs( - Path(setup_data, "mapillary_image_description.json"), + descs, [ { **_DEFAULT_EXPECTED_DESCS["DSC00001.JPG"], @@ -147,13 +148,13 @@ def test_time_with_offset(setup_data: py.path.local, use_exiftool: bool = False) ], ) - args = f"{EXECUTABLE} process --file_types=image {PROCESS_FLAGS} {setup_data} --offset_time=-1.0" + args = ["--file_types=image", str(setup_data), "--offset_time=-1.0"] if use_exiftool: args = run_exiftool_and_generate_geotag_args(setup_data, args) - x = subprocess.run(args, shell=True) - assert x.returncode == 0, x.stderr + + descs = run_process_for_descs(args) assert_contains_image_descs( - Path(setup_data, "mapillary_image_description.json"), + descs, [ { **_DEFAULT_EXPECTED_DESCS["DSC00001.JPG"], @@ -181,11 +182,17 @@ def test_time_with_offset_with_exiftool(setup_data: py.path.local): def test_process_images_with_overwrite_all_EXIF_tags( setup_data: py.path.local, use_exiftool: bool = False ): - args = f"{EXECUTABLE} process --file_types=image --overwrite_all_EXIF_tags --offset_time=2.5 {PROCESS_FLAGS} {setup_data}" + args = [ + "--file_types=image", + "--overwrite_all_EXIF_tags", + "--offset_time=2.5", + str(setup_data), + ] if use_exiftool: args = run_exiftool_and_generate_geotag_args(setup_data, args) - x = subprocess.run(args, shell=True) - assert x.returncode == 0, x.stderr + + actual_descs = run_process_for_descs(args) + expected_descs = [ { **_DEFAULT_EXPECTED_DESCS["DSC00001.JPG"], # type: ignore @@ -204,19 +211,15 @@ def test_process_images_with_overwrite_all_EXIF_tags( }, ] assert_contains_image_descs( - Path(setup_data, "mapillary_image_description.json"), + actual_descs, expected_descs, ) - args = f"{EXECUTABLE} process --file_types=image {PROCESS_FLAGS} {setup_data}" + args = ["--file_types=image", str(setup_data)] if use_exiftool: args = run_exiftool_and_generate_geotag_args(setup_data, args) - x = subprocess.run(args, shell=True) - assert x.returncode == 0, x.stderr - assert_contains_image_descs( - Path(setup_data, "mapillary_image_description.json"), - expected_descs, - ) + descs = run_process_for_descs(args) + assert_contains_image_descs(descs, expected_descs) def test_process_images_with_overwrite_all_EXIF_tags_with_exiftool( @@ -228,14 +231,15 @@ def test_process_images_with_overwrite_all_EXIF_tags_with_exiftool( def test_angle_with_offset(setup_data: py.path.local, use_exiftool: bool = False): - args = f"{EXECUTABLE} process --file_types=image {PROCESS_FLAGS} {setup_data} --offset_angle=2.5" + args = ["--file_types=image", str(setup_data), "--offset_angle=2.5"] + if use_exiftool: args = run_exiftool_and_generate_geotag_args(setup_data, args) - x = subprocess.run(args, shell=True) - assert x.returncode == 0, x.stderr + + descs = run_process_for_descs(args) assert_contains_image_descs( - Path(setup_data, "mapillary_image_description.json"), + descs, [ { **_DEFAULT_EXPECTED_DESCS["DSC00001.JPG"], @@ -271,10 +275,10 @@ def test_angle_with_offset_with_exiftool(setup_data: py.path.local): def test_parse_adobe_coordinates(setup_data: py.path.local): - args = f"{EXECUTABLE} process --file_types=image {PROCESS_FLAGS} {setup_data}/adobe_coords" - x = subprocess.run(args, shell=True) + args = ["--file_types=image", str(setup_data.join("adobe_coords"))] + descs = run_process_for_descs(args) assert_contains_image_descs( - Path(setup_data, "adobe_coords/mapillary_image_description.json"), + descs, [ { "filename": str(Path(setup_data, "adobe_coords", "adobe_coords.jpg")), @@ -291,24 +295,24 @@ def test_parse_adobe_coordinates(setup_data: py.path.local): ) -def test_zip(tmpdir: py.path.local, setup_data: py.path.local): +def test_zip_ok(tmpdir: py.path.local, setup_data: py.path.local): + # Generate description file in the setup_data directory + run_command(["--file_types=image", str(setup_data)], command="process") + zip_dir = tmpdir.mkdir("zip_dir") - x = subprocess.run( - f"{EXECUTABLE} process --file_types=image {PROCESS_FLAGS} {setup_data}", - shell=True, - ) - assert x.returncode == 0, x.stderr - x = subprocess.run( - f"{EXECUTABLE} zip {setup_data} {zip_dir}", - shell=True, - ) - assert x.returncode == 0, x.stderr + run_command([str(setup_data), str(zip_dir)], command="zip") assert 0 < len(zip_dir.listdir()) for file in zip_dir.listdir(): validate_and_extract_zip(Path(file)) -@pytest.mark.usefixtures("setup_config") +def test_zip_desc_not_found(tmpdir: py.path.local, setup_data: py.path.local): + zip_dir = tmpdir.mkdir("zip_dir") + with pytest.raises(subprocess.CalledProcessError) as exc_info: + run_command([str(setup_data), str(zip_dir)], command="zip") + assert exc_info.value.returncode == 3 + + def test_process_boolean_options(setup_data: py.path.local): boolean_options = [ "--interpolate_directions", @@ -320,17 +324,20 @@ def test_process_boolean_options(setup_data: py.path.local): "--skip_subfolders", ] for option in boolean_options: - x = subprocess.run( - f"{EXECUTABLE} process --file_types=image {PROCESS_FLAGS} {option} {setup_data}", - shell=True, + run_process_for_descs( + [ + "--file_types=image", + option, + str(setup_data), + ] ) - assert x.returncode == 0, x.stderr - all_options = " ".join(boolean_options) - x = subprocess.run( - f"{EXECUTABLE} process --file_types=image {PROCESS_FLAGS} {all_options} {setup_data}", - shell=True, + run_process_for_descs( + [ + "--file_types=image", + *boolean_options, + str(setup_data), + ] ) - assert x.returncode == 0, x.stderr GPX_CONTENT = """ @@ -377,18 +384,18 @@ def test_geotagging_images_from_gpx(setup_data: py.path.local): fp.write(GPX_CONTENT) images = setup_data.join("images") - x = subprocess.run( - f"""{EXECUTABLE} process {PROCESS_FLAGS} \ - --file_types=image \ - --geotag_source=gpx \ - --geotag_source_path={gpx_file} \ - --skip_process_errors \ - {images} -""", - shell=True, + descs = run_process_for_descs( + [ + "--file_types=image", + "--geotag_source=gpx", + "--geotag_source_path", + str(gpx_file), + str(images), + ] ) + assert_contains_image_descs( - Path(images, "mapillary_image_description.json"), + descs, [ { **_DEFAULT_EXPECTED_DESCS["DSC00001.JPG"], @@ -421,13 +428,18 @@ def test_geotagging_images_from_gpx_with_offset(setup_data: py.path.local): with gpx_file.open("w") as fp: fp.write(GPX_CONTENT) - x = subprocess.run( - f"{EXECUTABLE} --verbose process --file_types=image {PROCESS_FLAGS} {setup_data} --geotag_source gpx --geotag_source_path {gpx_file} --interpolation_offset_time=-20 --skip_process_errors", - shell=True, + descs = run_process_for_descs( + [ + "--file_types=image", + "--geotag_source=gpx", + "--geotag_source_path", + str(gpx_file), + str(setup_data), + "--interpolation_offset_time=-20", + ] ) - assert x.returncode == 0, x.stderr assert_contains_image_descs( - Path(setup_data, "mapillary_image_description.json"), + descs, [ { **_DEFAULT_EXPECTED_DESCS["DSC00001.JPG"], @@ -461,13 +473,18 @@ def test_geotagging_images_from_gpx_use_gpx_start_time(setup_data: py.path.local gpx_file = setup_data.join("test.gpx") with gpx_file.open("w") as fp: fp.write(GPX_CONTENT) - x = subprocess.run( - f"{EXECUTABLE} process --file_types=image {PROCESS_FLAGS} {setup_data} --geotag_source gpx --interpolation_use_gpx_start_time --geotag_source_path {gpx_file} --skip_process_errors", - shell=True, + descs = run_process_for_descs( + [ + "--file_types=image", + *["--geotag_source", "gpx"], + "--interpolation_use_gpx_start_time", + "--geotag_source_path", + str(gpx_file), + str(setup_data), + ] ) - assert x.returncode == 0, x.stderr assert_contains_image_descs( - Path(setup_data, "mapillary_image_description.json"), + descs, [ { **_DEFAULT_EXPECTED_DESCS["DSC00001.JPG"], @@ -503,13 +520,19 @@ def test_geotagging_images_from_gpx_use_gpx_start_time_with_offset( gpx_file = setup_data.join("test.gpx") with gpx_file.open("w") as fp: fp.write(GPX_CONTENT) - x = subprocess.run( - f"{EXECUTABLE} process --file_types=image {PROCESS_FLAGS} {setup_data} --geotag_source gpx --interpolation_use_gpx_start_time --geotag_source_path {gpx_file} --interpolation_offset_time=100 --skip_process_errors", - shell=True, + descs = run_process_for_descs( + [ + "--file_types=image", + str(setup_data), + *["--geotag_source", "gpx"], + "--interpolation_use_gpx_start_time", + "--geotag_source_path", + str(gpx_file), + "--interpolation_offset_time=100", + ] ) - assert x.returncode == 0, x.stderr assert_contains_image_descs( - Path(setup_data, "mapillary_image_description.json"), + descs, [ { **_DEFAULT_EXPECTED_DESCS["DSC00001.JPG"], @@ -541,14 +564,7 @@ def test_geotagging_images_from_gpx_use_gpx_start_time_with_offset( def test_process_filetypes(setup_data: py.path.local): video_dir = setup_data.join("gopro_data") - x = subprocess.run( - f"{EXECUTABLE} --verbose process {PROCESS_FLAGS} --skip_process_errors {video_dir}", - shell=True, - ) - assert x.returncode == 0, x.stderr - desc_path = video_dir.join("mapillary_image_description.json") - with open(desc_path) as fp: - descs = json.load(fp) + descs = run_process_for_descs([str(video_dir)]) assert 2 == len(descs) assert 1 == len(find_desc_errors(descs)) assert 1 == len(filter_out_errors(descs)) @@ -557,262 +573,244 @@ def test_process_filetypes(setup_data: py.path.local): def test_process_unsupported_filetypes(setup_data: py.path.local): video_dir = setup_data.join("gopro_data") for filetypes in ["blackvue"]: - x = subprocess.run( - f"{EXECUTABLE} --verbose process --filetypes={filetypes} --geotag_source=native {PROCESS_FLAGS} --skip_process_errors {video_dir}", - shell=True, + descs = run_process_for_descs( + ["--filetypes", filetypes, "--geotag_source=native", str(video_dir)] ) - assert x.returncode == 0, x.stderr - desc_path = video_dir.join("mapillary_image_description.json") - with open(desc_path) as fp: - descs = json.load(fp) assert 2 == len(descs) assert 2 == len(find_desc_errors(descs)) for filetypes in ["image"]: - x = subprocess.run( - f"{EXECUTABLE} --verbose process --filetypes={filetypes} --geotag_source=native {PROCESS_FLAGS} --skip_process_errors {video_dir}", - shell=True, + descs = run_process_for_descs( + ["--filetypes", filetypes, "--geotag_source=native", str(video_dir)] ) - assert x.returncode == 0, x.stderr - desc_path = video_dir.join("mapillary_image_description.json") - with open(desc_path) as fp: - descs = json.load(fp) assert 0 == len(descs) -def test_sample_video_relpath(): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") +def test_process_video_geotag_source_with_gpx_specified(setup_data: py.path.local): + video_path = setup_data.join("videos").join("sample-5s.mp4") + gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") - with tempfile.TemporaryDirectory() as dir: - x = subprocess.run( - f"{EXECUTABLE} sample_video --rerun tests/data/gopro_data/hero8.mp4 {dir}", - shell=True, - ) - assert x.returncode == 0, x.stderr + descs = run_process_for_descs( + [ + *[ + "--video_geotag_source", + json.dumps({"source": "gpx", "source_path": str(gpx_file)}), + ], + str(video_path), + ] + ) + assert len(descs) == 1 + assert len(descs[0]["MAPGPSTrack"]) > 0 -def test_sample_video_relpath_dir(): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") - with tempfile.TemporaryDirectory() as dir: - x = subprocess.run( - f"{EXECUTABLE} sample_video --rerun --video_start_time 2021_10_10_10_10_10_123 tests/integration {dir}", - shell=True, - ) - assert x.returncode == 0, x.stderr +def test_process_video_geotag_source_gpx_not_found(setup_data: py.path.local): + video_path = setup_data.join("videos").join("sample-5s.mp4") + descs = run_process_for_descs( + [ + *["--video_geotag_source", "gpx"], + str(video_path), + ] + ) + assert len(descs) == 1 + assert descs[0]["error"]["type"] == "MapillaryVideoGPSNotFoundError" -def test_sample_video_without_video_time(setup_data: py.path.local): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") - video_dir = setup_data.join("videos") - root_sample_dir = video_dir.join("mapillary_sampled_video_frames") +def test_process_video_geotag_source_with_gopro_gpx_specified( + setup_data: py.path.local, +): + video_path = setup_data.join("gopro_data").join("max-360mode.mp4") + gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") - for input_path in [video_dir, video_dir.join("sample-5s.mp4")]: - x = subprocess.run( - f"{EXECUTABLE} sample_video --video_sample_interval=2 --video_sample_distance=-1 --video_sample_distance=-1 --rerun {input_path}", - shell=True, - ) - assert x.returncode == 7, x.stderr - if root_sample_dir.exists(): - assert len(root_sample_dir.listdir()) == 0 + descs = run_process_for_descs( + [ + *[ + "--video_geotag_source", + json.dumps({"source": "gpx", "source_path": str(gpx_file)}), + ], + str(video_path), + ] + ) - x = subprocess.run( - f"{EXECUTABLE} sample_video --video_sample_interval=2 --video_sample_distance=-1 --skip_sample_errors --rerun {input_path}", - shell=True, - ) - assert x.returncode == 0, x.stderr - if root_sample_dir.exists(): - assert len(root_sample_dir.listdir()) == 0 + assert len(descs) == 1 + assert descs[0]["MAPDeviceMake"] == "GoPro" + assert descs[0]["MAPDeviceModel"] == "GoPro Max" + assert len(descs[0]["MAPGPSTrack"]) > 0 - x = subprocess.run( - f"{EXECUTABLE} sample_video --video_sample_interval=2 --video_sample_distance=-1 --video_start_time 2021_10_10_10_10_10_123 --rerun {input_path}", - shell=True, - ) - assert x.returncode == 0, x.stderr - assert len(root_sample_dir.listdir()) == 1 - samples = root_sample_dir.join("sample-5s.mp4").listdir() - samples.sort() - times = [] - for s in samples: - with s.open("rb") as fp: - tags = exifread.process_file(fp) - times.append(tags["EXIF DateTimeOriginal"].values) - assert ( - "2021:10:10 10:10:10", - "2021:10:10 10:10:12", - "2021:10:10 10:10:14", - ) == tuple(times) - - -def test_video_process(setup_data: py.path.local): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") - - video_dir = setup_data.join("videos") + +def test_process_geotag_with_gpx_pattern_not_found(setup_data: py.path.local): + video_path = setup_data.join("gopro_data").join("max-360mode.mp4") + + descs = run_process_for_descs( + [ + *["--video_geotag_source", "gpx"], + str(video_path), + ] + ) + + assert len(descs) == 1 + assert descs[0]["error"]["type"] == "MapillaryVideoGPSNotFoundError" + + +def test_process_geotag_with_gpx_pattern(setup_data: py.path.local): + video_path = setup_data.join("gopro_data").join("max-360mode.mp4") gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") - gpx_start_time = "2025_03_14_07_00_00_000" - gpx_end_time = "2025_03_14_07_01_33_624" - video_start_time = "2025_03_14_07_00_00_000" - desc_path = video_dir.join("my_samples").join("mapillary_image_description.json") - x = subprocess.run( - f"""{EXECUTABLE} --verbose video_process \ - {PROCESS_FLAGS} \ - --video_sample_interval=2 \ - --video_sample_distance=-1 \ - --skip_process_errors \ - --video_start_time {video_start_time} \ - --geotag_source gpx \ - --geotag_source_path {gpx_file} {video_dir} {video_dir.join("my_samples")} -""", - shell=True, - ) - assert x.returncode == 0, x.stderr - with open(desc_path) as fp: - descs = json.load(fp) - assert 0 == len(find_desc_errors(descs)) - assert 3 == len(filter_out_errors(descs)) - - -def test_video_process_sample_with_multiple_distances(setup_data: py.path.local): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") + gpx_file.copy(setup_data.join("gopro_data").join("max-360mode.gpx")) - video_dir = setup_data.join("gopro_data") - desc_path = video_dir.join("my_samples").join("mapillary_image_description.json") - for distance in [0, 2.4, 100]: - x = subprocess.run( - f"{EXECUTABLE} --verbose video_process --video_sample_distance={distance} --rerun {PROCESS_FLAGS} {video_dir} {video_dir.join('my_samples')}", - shell=True, - ) - assert x.returncode == 0, x.stderr - with open(desc_path) as fp: - descs = json.load(fp) - if distance == 100: - assert 1 == len(descs) - else: - assert len(descs) > 1 + descs = run_process_for_descs( + [ + *["--video_geotag_source", "gpx"], + str(video_path), + ] + ) + assert len(descs) == 1 + assert descs[0]["MAPDeviceMake"] == "GoPro" + assert descs[0]["MAPDeviceModel"] == "GoPro Max" + assert len(descs[0]["MAPGPSTrack"]) > 0 -def test_video_process_sample_with_distance(setup_data: py.path.local): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") - video_dir = setup_data.join("gopro_data") - sample_dir = Path(setup_data, "gopro_data", "my_samples") - desc_path = Path(sample_dir, "mapillary_image_description.json") - for option in [ - "--video_sample_distance=6", - "--video_sample_distance=6 --video_sample_interval=-2", - ]: - x = subprocess.run( - f"{EXECUTABLE} --verbose video_process {option} {PROCESS_FLAGS} {video_dir} {video_dir.join('my_samples')}", - shell=True, - ) - assert x.returncode == 0, x.stderr - assert_contains_image_descs( - desc_path, - [ - { - "filename": str( - Path( - sample_dir, - "max-360mode.mp4", - "max-360mode_0_000001.jpg", - ) - ), - "filetype": "image", - "MAPLatitude": 33.1266719, - "MAPLongitude": -117.3273063, - "MAPCaptureTime": "2019_11_18_15_44_47_862", - "MAPAltitude": -22.18, - "MAPCompassHeading": { - "TrueHeading": 313.68, - "MagneticHeading": 313.68, - }, - "MAPSequenceUUID": "0", - "MAPDeviceMake": "GoPro", - "MAPDeviceModel": "GoPro Max", - "MAPOrientation": 1, - }, - { - "filename": str( - Path( - sample_dir, - "max-360mode.mp4", - "max-360mode_0_000002.jpg", - ) - ), - "filetype": "image", - "MAPLatitude": 33.1267206, - "MAPLongitude": -117.3273345, - "MAPCaptureTime": "2019_11_18_15_44_53_159", - "MAPAltitude": -21.91, - "MAPCompassHeading": { - "TrueHeading": 330.82, - "MagneticHeading": 330.82, - }, - "MAPSequenceUUID": "0", - "MAPDeviceMake": "GoPro", - "MAPDeviceModel": "GoPro Max", - "MAPOrientation": 1, - }, - { - "filename": str( - Path( - sample_dir, - "max-360mode.mp4", - "max-360mode_0_000003.jpg", - ) - ), - "filetype": "image", - "MAPLatitude": 33.1267702, - "MAPLongitude": -117.3273612, - "MAPCaptureTime": "2019_11_18_15_44_58_289", - "MAPAltitude": -22.58, - "MAPCompassHeading": { - "TrueHeading": 10.54, - "MagneticHeading": 10.54, - }, - "MAPSequenceUUID": "0", - "MAPDeviceMake": "GoPro", - "MAPDeviceModel": "GoPro Max", - "MAPOrientation": 1, - }, +def test_process_video_geotag_source_with_exiftool_runtime(setup_data: py.path.local): + pytest_skip_if_not_exiftool_installed() + + video_path = setup_data.join("gopro_data").join("max-360mode.mp4") + + exiftool_descs = run_process_for_descs( + [ + *["--video_geotag_source", json.dumps({"source": "exiftool"})], + str(video_path), + ] + ) + + assert len(exiftool_descs) == 1 + assert exiftool_descs[0]["MAPDeviceMake"] == "GoPro" + assert exiftool_descs[0]["MAPDeviceModel"] == "GoPro Max" + assert len(exiftool_descs[0]["MAPGPSTrack"]) > 0 + + native_descs = run_process_for_descs( + [ + *["--video_geotag_source", json.dumps({"source": "native"})], + str(video_path), + ] + ) + + assert_descs_exact_equal(exiftool_descs, native_descs) + + +def test_process_geotag_everything_with_exiftool_runtime(setup_data: py.path.local): + pytest_skip_if_not_exiftool_installed() + + exiftool_descs = run_process_for_descs( + [*["--geotag_source", "exiftool_runtime"], str(setup_data)] + ) + + native_descs = run_process_for_descs( + [*["--geotag_source", "native"], str(setup_data)] + ) + + assert_descs_exact_equal(exiftool_descs, native_descs) + + +def test_process_geotag_everything_with_exiftool_not_found(setup_data: py.path.local): + pytest_skip_if_not_exiftool_installed() + + env = os.environ.copy() + env.update( + { + "MAPILLARY_TOOLS_EXIFTOOL_PATH": "exiftool_not_found", + } + ) + + exiftool_descs = run_process_for_descs( + [*["--geotag_source", "exiftool_runtime"], str(setup_data)], env=env + ) + + assert len(exiftool_descs) > 0 + for d in exiftool_descs: + assert "error" in d + assert d["error"]["type"] == "MapillaryExiftoolNotFoundError" + + +def test_process_geotag_everything_with_exiftool_not_found_overriden( + setup_data: py.path.local, +): + pytest_skip_if_not_exiftool_installed() + + env = os.environ.copy() + env.update( + { + "MAPILLARY_TOOLS_EXIFTOOL_PATH": "exiftool_not_found", + } + ) + + exiftool_descs = run_process_for_descs( + [ + *["--geotag_source", "exiftool_runtime"], + *["--geotag_source", "native"], + str(setup_data), + ], + env=env, + ) + + native_descs = run_process_for_descs( + [ + *["--geotag_source", "exiftool_runtime"], + *["--geotag_source", "native"], + str(setup_data), + ], + env=env, + ) + + assert_descs_exact_equal(exiftool_descs, native_descs) + + +def test_process_geotag_with_exiftool_xml(setup_data: py.path.local): + pytest_skip_if_not_exiftool_installed() + + exiftool_output_dir = run_exiftool_dir(setup_data) + + exiftool_descs = run_process_for_descs( + [ + *[ + "--geotag_source", + json.dumps( + {"source": "exiftool_xml", "source_path": str(exiftool_output_dir)} + ), ], - ) + str(setup_data), + ] + ) + native_descs = run_process_for_descs( + [*["--geotag_source", "native"], str(setup_data)] + ) -def test_video_process_multiple_videos(setup_data: py.path.local): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") + assert_descs_exact_equal(exiftool_descs, native_descs) - desc_path = setup_data.join("my_samples").join("mapillary_image_description.json") - sub_folder = setup_data.join("video_sub_folder").mkdir() - video_path = setup_data.join("videos").join("sample-5s.mp4") - video_path.copy(sub_folder) - gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") - gpx_start_time = "2025_03_14_07_00_00_000" - gpx_end_time = "2025_03_14_07_01_33_624" - x = subprocess.run( - f"""{EXECUTABLE} video_process {PROCESS_FLAGS} \ - --video_sample_interval=2 \ - --video_sample_distance=-1 \ - --video_start_time={gpx_start_time} \ - --geotag_source=gpx \ - --geotag_source_path={gpx_file} \ - {video_path} {setup_data.join("my_samples")} -""", - shell=True, - ) - assert x.returncode == 0, x.stderr - with open(desc_path) as fp: - descs = json.load(fp) - for d in descs: - assert Path(d["filename"]).is_file(), d["filename"] - assert "sample-5s.mp4" in d["filename"] - assert 0 == len(find_desc_errors(descs)) - assert 3 == len(filter_out_errors(descs)) + +def test_process_geotag_with_exiftool_xml_pattern(setup_data: py.path.local): + pytest_skip_if_not_exiftool_installed() + + exiftool_output_dir = run_exiftool_dir(setup_data) + + exiftool_descs = run_process_for_descs( + [ + *[ + "--geotag_source", + json.dumps( + { + "source": "exiftool_xml", + "pattern": str(exiftool_output_dir.join("%g.xml")), + } + ), + ], + str(setup_data), + ] + ) + + native_descs = run_process_for_descs( + [*["--geotag_source", "native"], str(setup_data)] + ) + + assert_descs_exact_equal(exiftool_descs, native_descs) diff --git a/tests/integration/test_process_and_upload.py b/tests/integration/test_process_and_upload.py index c3d93c5ca..c21cb2c91 100644 --- a/tests/integration/test_process_and_upload.py +++ b/tests/integration/test_process_and_upload.py @@ -1,5 +1,4 @@ import datetime -import subprocess from pathlib import Path import py.path @@ -8,19 +7,14 @@ from .fixtures import ( assert_contains_image_descs, assert_same_image_descs, - EXECUTABLE, extract_all_uploaded_descs, - IS_FFMPEG_INSTALLED, + pytest_skip_if_not_ffmpeg_installed, + run_process_and_upload_for_descs, setup_config, setup_data, setup_upload, - USERNAME, ) -PROCESS_FLAGS = "" -UPLOAD_FLAGS = f"--dry_run --user_name={USERNAME}" - - EXPECTED_DESCS = { "image": { "DSC00001.JPG": { @@ -140,63 +134,62 @@ def test_process_and_upload(setup_data: py.path.local, setup_upload: py.path.loc setup_data.join("images"), setup_data.join("images").join("DSC00001.JPG"), ] - subprocess.run( - f"{EXECUTABLE} --verbose process_and_upload {UPLOAD_FLAGS} {' '.join(map(str, input_paths))} --skip_process_errors", - shell=True, - check=True, - ) - descs = sum(extract_all_uploaded_descs(Path(setup_upload)), []) + descs = run_process_and_upload_for_descs([*[str(path) for path in input_paths]]) assert_contains_image_descs( descs, [*EXPECTED_DESCS["gopro"].values(), *EXPECTED_DESCS["image"].values()], ) + uploaded_descs = sum(extract_all_uploaded_descs(Path(setup_upload)), []) + assert_contains_image_descs( + uploaded_descs, + [*EXPECTED_DESCS["gopro"].values(), *EXPECTED_DESCS["image"].values()], + ) + @pytest.mark.usefixtures("setup_config") def test_process_and_upload_images_only( - setup_data: py.path.local, - setup_upload: py.path.local, + setup_data: py.path.local, setup_upload: py.path.local ): - subprocess.run( - f"""{EXECUTABLE} --verbose process_and_upload \ - {UPLOAD_FLAGS} {PROCESS_FLAGS} \ - --filetypes=image \ - --desc_path=- \ - {setup_data}/images {setup_data}/images {setup_data}/images/DSC00001.JPG -""", - shell=True, - check=True, + descs = run_process_and_upload_for_descs( + [ + "--filetypes=image", + str(setup_data.join("images")), + str(setup_data.join("images")), + str(setup_data.join("images").join("DSC00001.JPG")), + ] ) - descs = sum(extract_all_uploaded_descs(Path(setup_upload)), []) assert_contains_image_descs(descs, [*EXPECTED_DESCS["image"].values()]) + uploaded_descs = sum(extract_all_uploaded_descs(Path(setup_upload)), []) + assert_contains_image_descs(uploaded_descs, [*EXPECTED_DESCS["image"].values()]) + @pytest.mark.usefixtures("setup_config") def test_video_process_and_upload( setup_upload: py.path.local, setup_data: py.path.local ): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() video_dir = setup_data.join("videos") gpx_start_time = "2025_03_14_07_00_00_000" gpx_end_time = "2025_03_14_07_01_33_624" gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") - subprocess.run( - f"""{EXECUTABLE} video_process_and_upload \ - {PROCESS_FLAGS} {UPLOAD_FLAGS} \ - --video_sample_interval=2 \ - --video_sample_distance=-1 \ - --video_start_time {gpx_start_time} \ - --geotag_source gpx \ - --geotag_source_path {gpx_file} \ - --desc_path - \ - {video_dir} {video_dir.join("my_samples")} -""", - shell=True, - check=True, + + run_process_and_upload_for_descs( + [ + "--video_sample_interval=2", + "--video_sample_distance=-1", + *["--video_start_time", gpx_start_time], + *["--geotag_source", "gpx"], + *["--geotag_source_path", str(gpx_file)], + str(video_dir), + str(video_dir.join("my_samples")), + ], + command="video_process_and_upload", ) + expected = { "sample-5s_v_000001.jpg": { "filename": "sample-5s_v_000001.jpg", @@ -241,36 +234,34 @@ def test_video_process_and_upload( "filetype": "image", }, } - descs = sum(extract_all_uploaded_descs(Path(setup_upload)), []) - assert_same_image_descs(descs, list(expected.values())) + uploaded_descs = sum(extract_all_uploaded_descs(Path(setup_upload)), []) + assert_same_image_descs(uploaded_descs, list(expected.values())) @pytest.mark.usefixtures("setup_config") def test_video_process_and_upload_after_gpx( setup_upload: py.path.local, setup_data: py.path.local ): - if not IS_FFMPEG_INSTALLED: - pytest.skip("skip because ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() video_dir = setup_data.join("videos") gpx_start_time = "2025_03_14_07_00_00_000" gpx_end_time = "2025_03_14_07_01_33_624" video_start_time = "2025_03_14_07_01_34_624" gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") - subprocess.run( - f"""{EXECUTABLE} video_process_and_upload \ - {PROCESS_FLAGS} {UPLOAD_FLAGS} \ - --video_sample_interval=2 \ - --video_sample_distance=-1 \ - --video_start_time {video_start_time} \ - --geotag_source gpx \ - --geotag_source_path {gpx_file} \ - --skip_process_errors \ - --desc_path - \ - {video_dir} {video_dir.join("my_samples")} -""", - shell=True, - check=True, + + run_process_and_upload_for_descs( + [ + "--video_sample_interval=2", + "--video_sample_distance=-1", + *["--video_start_time", video_start_time], + *["--geotag_source", "gpx"], + *["--geotag_source_path", str(gpx_file)], + str(video_dir), + str(video_dir.join("my_samples")), + ], + command="video_process_and_upload", ) - descs = sum(extract_all_uploaded_descs(Path(setup_upload)), []) - assert_same_image_descs(descs, []) + + uploaded_descs = sum(extract_all_uploaded_descs(Path(setup_upload)), []) + assert_same_image_descs(uploaded_descs, []) diff --git a/tests/integration/test_upload.py b/tests/integration/test_upload.py index 31a33d4c1..8bc0600be 100644 --- a/tests/integration/test_upload.py +++ b/tests/integration/test_upload.py @@ -7,86 +7,70 @@ from .fixtures import ( assert_contains_image_descs, - EXECUTABLE, extract_all_uploaded_descs, + run_process_and_upload_for_descs, + run_process_for_descs, + run_upload, setup_config, setup_data, setup_upload, - USERNAME, ) -PROCESS_FLAGS = "" -UPLOAD_FLAGS = f"--dry_run --user_name={USERNAME}" +@pytest.mark.usefixtures("setup_config") +def test_upload_everything(setup_data: py.path.local, setup_upload: py.path.local): + descs = run_process_and_upload_for_descs([str(setup_data)]) + + uploaded_descs: list[dict] = sum(extract_all_uploaded_descs(Path(setup_upload)), []) + assert len(uploaded_descs) > 0, "No data were uploaded" + assert len([d for d in uploaded_descs if d["filetype"] == "camm"]) > 0 + assert len([d for d in uploaded_descs if d["filetype"] == "error"]) == 0 + assert len([d for d in uploaded_descs if d["filetype"] == "image"]) > 0 + + # TODO: check video descs + assert_contains_image_descs(descs, uploaded_descs) @pytest.mark.usefixtures("setup_config") def test_upload_image_dir(setup_data: py.path.local, setup_upload: py.path.local): - subprocess.run( - f"{EXECUTABLE} process {PROCESS_FLAGS} --file_types=image {setup_data}", - shell=True, - check=True, - ) - - subprocess.run( - f"{EXECUTABLE} process_and_upload {UPLOAD_FLAGS} --file_types=image {setup_data}", - shell=True, - check=True, - ) + descs = run_process_and_upload_for_descs(["--file_types=image", str(setup_data)]) uploaded_descs: list[dict] = sum(extract_all_uploaded_descs(Path(setup_upload)), []) assert len(uploaded_descs) > 0, "No images were uploaded" - assert_contains_image_descs( - Path(setup_data.join("mapillary_image_description.json")), - uploaded_descs, - ) + assert_contains_image_descs(descs, uploaded_descs) @pytest.mark.usefixtures("setup_config") def test_upload_image_dir_twice(setup_data: py.path.local, setup_upload: py.path.local): - subprocess.run( - f"{EXECUTABLE} process --skip_process_errors {PROCESS_FLAGS} {setup_data}", - shell=True, - check=True, - ) - desc_path = setup_data.join("mapillary_image_description.json") + descs = run_process_for_descs([str(setup_data)]) # first upload - subprocess.run( - f"{EXECUTABLE} process_and_upload {UPLOAD_FLAGS} --file_types=image {setup_data}", - shell=True, - check=True, - ) - first_descs = extract_all_uploaded_descs(Path(setup_upload)) - assert_contains_image_descs( - Path(desc_path), - sum(first_descs, []), + first_descs = run_process_and_upload_for_descs( + ["--file_types=image", str(setup_data)] ) + assert len([d for d in first_descs if d["filetype"] == "image"]) > 0 + first_uploaded_descs = extract_all_uploaded_descs(Path(setup_upload)) + assert_contains_image_descs(descs, sum(first_uploaded_descs, [])) # expect the second upload to not produce new uploads - subprocess.run( - f"{EXECUTABLE} process_and_upload {UPLOAD_FLAGS} --desc_path={desc_path} --file_types=image {setup_data} {setup_data} {setup_data}/images/DSC00001.JPG", - shell=True, - check=True, - ) - second_descs = extract_all_uploaded_descs(Path(setup_upload)) - assert_contains_image_descs( - Path(desc_path), - sum(second_descs, []), + second_descs = run_process_and_upload_for_descs( + [ + "--file_types=image", + str(setup_data), + str(setup_data), + str(setup_data.join("images/DSC00001.JPG")), + ] ) + assert len([d for d in second_descs if d["filetype"] == "image"]) > 0 + second_uploaded_descs = extract_all_uploaded_descs(Path(setup_upload)) + assert_contains_image_descs(descs, sum(second_uploaded_descs, [])) @pytest.mark.usefixtures("setup_config") def test_upload_wrong_descs(setup_data: py.path.local, setup_upload: py.path.local): - x = subprocess.run( - f"{EXECUTABLE} process --skip_process_errors {PROCESS_FLAGS} {setup_data}", - shell=True, - ) - assert x.returncode == 0, x.stderr - desc_path = setup_data.join("mapillary_image_description.json") - with open(desc_path, "r") as fp: - descs = json.load(fp) + descs = run_process_for_descs([str(setup_data)]) + descs.append( { "filename": str(setup_data.join("not_found")), @@ -97,51 +81,20 @@ def test_upload_wrong_descs(setup_data: py.path.local, setup_upload: py.path.loc "MAPCompassHeading": {"TrueHeading": 17.0, "MagneticHeading": 17.0}, }, ) - with open(desc_path, "w") as fp: - fp.write(json.dumps(descs)) - - x = subprocess.run( - f"{EXECUTABLE} upload {UPLOAD_FLAGS} --desc_path={desc_path} {setup_data} {setup_data} {setup_data}/images/DSC00001.JPG", - shell=True, - ) - assert x.returncode == 15, x.stderr - - -@pytest.mark.usefixtures("setup_config") -def test_upload_read_descs_from_stdin( - setup_data: py.path.local, setup_upload: py.path.local -): - subprocess.run( - f"{EXECUTABLE} process {PROCESS_FLAGS} --file_types=image {setup_data}", - shell=True, - check=True, - ) - - descs = [ - { - "filename": "foo.jpg", - "filetype": "image", - "MAPLatitude": 1.0, - "MAPLongitude": 2.0, - "MAPCaptureTime": "2020_01_02_11_12_13_123456", - }, - ] - descs_json = json.dumps(descs) - - process = subprocess.Popen( - f"{EXECUTABLE} process_and_upload {UPLOAD_FLAGS} --file_types=image {setup_data}", - stdin=subprocess.PIPE, - text=True, - shell=True, - ) - stdout, stderr = process.communicate(input=descs_json) - assert process.returncode == 0, stderr + desc_path = setup_data.join("mapillary_image_description.json") - uploaded_descs: list[dict] = sum(extract_all_uploaded_descs(Path(setup_upload)), []) - assert len(uploaded_descs) > 0, "No images were uploaded" + with open(desc_path, "w") as fp: + fp.write(json.dumps(descs)) - assert_contains_image_descs( - Path(setup_data.join("mapillary_image_description.json")), - uploaded_descs, - ) + with pytest.raises(subprocess.CalledProcessError) as ex: + run_upload( + [ + *["--desc_path", str(desc_path)], + str(setup_data), + str(setup_data), + str(setup_data.join("images/DSC00001.JPG")), + ] + ) + + assert ex.value.returncode == 15, ex.value.stderr diff --git a/tests/integration/test_video_process.py b/tests/integration/test_video_process.py new file mode 100644 index 000000000..f6c5437fa --- /dev/null +++ b/tests/integration/test_video_process.py @@ -0,0 +1,268 @@ +import functools +import subprocess +import tempfile +from pathlib import Path + +import exifread + +import py.path +import pytest + +from .fixtures import ( + assert_contains_image_descs, + pytest_skip_if_not_ffmpeg_installed, + run_command, + run_process_for_descs, + setup_data, +) + + +run_video_process_for_descs = functools.partial( + run_process_for_descs, command="video_process" +) + +run_sample_video = functools.partial(run_command, command="sample_video") + + +def test_sample_video_relpath(): + pytest_skip_if_not_ffmpeg_installed() + + with tempfile.TemporaryDirectory() as dir: + run_sample_video(["--rerun", "tests/data/gopro_data/hero8.mp4", str(dir)]) + + with tempfile.TemporaryDirectory() as dir: + run_sample_video( + [ + "--rerun", + "--video_start_time", + "2021_10_10_10_10_10_123", + "tests/data", + str(dir), + ] + ) + + +def test_sample_video_without_video_time(setup_data: py.path.local): + pytest_skip_if_not_ffmpeg_installed() + + video_dir = setup_data.join("videos") + root_sample_dir = video_dir.join("mapillary_sampled_video_frames") + + for input_path in [video_dir, video_dir.join("sample-5s.mp4")]: + with pytest.raises(subprocess.CalledProcessError) as ex: + run_sample_video( + [ + "--video_sample_interval=2", + "--video_sample_distance=-1", + "--rerun", + input_path, + ] + ) + assert 7 == ex.value.returncode, ex.value.stderr + + if root_sample_dir.exists(): + assert len(root_sample_dir.listdir()) == 0 + + run_sample_video( + [ + "--video_sample_interval=2", + "--video_sample_distance=-1", + "--skip_sample_errors", + "--rerun", + input_path, + ] + ) + if root_sample_dir.exists(): + assert len(root_sample_dir.listdir()) == 0 + + +def test_sample_video_specify_video_start_time(setup_data: py.path.local): + pytest_skip_if_not_ffmpeg_installed() + + video_dir = setup_data.join("videos") + root_sample_dir = video_dir.join("mapillary_sampled_video_frames") + + for input_path in [video_dir, video_dir.join("sample-5s.mp4")]: + run_sample_video( + [ + "--video_sample_interval=2", + "--video_sample_distance=-1", + "--video_start_time=2021_10_10_10_10_10_123", + "--rerun", + input_path, + ] + ) + samples = root_sample_dir.join("sample-5s.mp4").listdir() + samples.sort() + times = [] + for s in samples: + with s.open("rb") as fp: + tags = exifread.process_file(fp) + times.append(tags["EXIF DateTimeOriginal"].values) + assert ( + "2021:10:10 10:10:10", + "2021:10:10 10:10:12", + "2021:10:10 10:10:14", + ) == tuple(times) + + +def test_video_process(setup_data: py.path.local): + pytest_skip_if_not_ffmpeg_installed() + + video_dir = setup_data.join("videos") + gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") + gpx_start_time = "2025_03_14_07_00_00_000" + gpx_end_time = "2025_03_14_07_01_33_624" + video_start_time = "2025_03_14_07_00_00_000" + + descs = run_video_process_for_descs( + [ + "--video_sample_interval=2", + "--video_sample_distance=-1", + *["--video_start_time", video_start_time], + "--geotag_source=gpx", + *["--geotag_source_path", str(gpx_file)], + str(video_dir), + str(video_dir.join("my_samples")), + ] + ) + assert 3 == len(descs) + assert 0 == len([d for d in descs if "error" in d]) + + +def test_video_process_sample_with_multiple_distances(setup_data: py.path.local): + pytest_skip_if_not_ffmpeg_installed() + + video_dir = setup_data.join("gopro_data") + for distance in [0, 2.4, 100]: + descs = run_video_process_for_descs( + [ + "--video_sample_distance", + str(distance), + "--rerun", + str(video_dir), + str(video_dir.join("my_samples")), + ] + ) + if distance == 100: + assert 1 == len(descs) + else: + assert len(descs) > 1 + + +def test_video_process_sample_with_distance(setup_data: py.path.local): + pytest_skip_if_not_ffmpeg_installed() + + video_dir = setup_data.join("gopro_data") + sample_dir = Path(setup_data, "gopro_data", "my_samples") + + for options in [ + ["--video_sample_distance=6"], + ["--video_sample_distance=6", "--video_sample_interval=-2"], + ]: + descs = run_video_process_for_descs( + [ + *options, + str(video_dir), + str(video_dir.join("my_samples")), + ] + ) + assert_contains_image_descs( + descs, + [ + { + "filename": str( + Path( + sample_dir, + "max-360mode.mp4", + "max-360mode_0_000001.jpg", + ) + ), + "filetype": "image", + "MAPLatitude": 33.1266719, + "MAPLongitude": -117.3273063, + "MAPCaptureTime": "2019_11_18_15_44_47_862", + "MAPAltitude": -22.18, + "MAPCompassHeading": { + "TrueHeading": 313.68, + "MagneticHeading": 313.68, + }, + "MAPSequenceUUID": "0", + "MAPDeviceMake": "GoPro", + "MAPDeviceModel": "GoPro Max", + "MAPOrientation": 1, + }, + { + "filename": str( + Path( + sample_dir, + "max-360mode.mp4", + "max-360mode_0_000002.jpg", + ) + ), + "filetype": "image", + "MAPLatitude": 33.1267206, + "MAPLongitude": -117.3273345, + "MAPCaptureTime": "2019_11_18_15_44_53_159", + "MAPAltitude": -21.91, + "MAPCompassHeading": { + "TrueHeading": 330.82, + "MagneticHeading": 330.82, + }, + "MAPSequenceUUID": "0", + "MAPDeviceMake": "GoPro", + "MAPDeviceModel": "GoPro Max", + "MAPOrientation": 1, + }, + { + "filename": str( + Path( + sample_dir, + "max-360mode.mp4", + "max-360mode_0_000003.jpg", + ) + ), + "filetype": "image", + "MAPLatitude": 33.1267702, + "MAPLongitude": -117.3273612, + "MAPCaptureTime": "2019_11_18_15_44_58_289", + "MAPAltitude": -22.58, + "MAPCompassHeading": { + "TrueHeading": 10.54, + "MagneticHeading": 10.54, + }, + "MAPSequenceUUID": "0", + "MAPDeviceMake": "GoPro", + "MAPDeviceModel": "GoPro Max", + "MAPOrientation": 1, + }, + ], + ) + + +def test_video_process_multiple_videos(setup_data: py.path.local): + pytest_skip_if_not_ffmpeg_installed() + + sub_folder = setup_data.join("video_sub_folder").mkdir() + video_path = setup_data.join("videos").join("sample-5s.mp4") + video_path.copy(sub_folder) + gpx_file = setup_data.join("gpx").join("sf_30km_h.gpx") + gpx_start_time = "2025_03_14_07_00_00_000" + gpx_end_time = "2025_03_14_07_01_33_624" + descs = run_video_process_for_descs( + [ + "--video_sample_interval=2", + "--video_sample_distance=-1", + *["--video_start_time", gpx_start_time], + "--geotag_source=gpx", + "--geotag_source_path", + str(gpx_file), + str(sub_folder), + str(setup_data.join("my_samples")), + ] + ) + for d in descs: + assert Path(d["filename"]).is_file(), d["filename"] + assert "sample-5s.mp4" in d["filename"] + assert 3 == len(descs) + assert 0 == len([d for d in descs if "error" in d]) diff --git a/tests/unit/test_blackvue_parser.py b/tests/unit/test_blackvue_parser.py index 6cb1eb014..7d4f145fb 100644 --- a/tests/unit/test_blackvue_parser.py +++ b/tests/unit/test_blackvue_parser.py @@ -39,29 +39,12 @@ def test_parse_points(): box = {"type": b"free", "data": [{"type": b"gps ", "data": gps_data}]} data = cparser.Box32ConstructBuilder({b"free": {}}).Box.build(box) info = blackvue_parser.extract_blackvue_info(io.BytesIO(data)) - assert info is not None - assert [ - geo.Point( - time=0.0, lat=38.8861575, lon=-76.99239516666667, alt=10.2, angle=None - ), - geo.Point( - time=0.002, - lat=38.8861575, - lon=-76.99239516666667, - alt=None, - angle=None, - ), - geo.Point( - time=0.003, - lat=38.88615816666667, - lon=-76.992434, - alt=None, - angle=None, - ), - geo.Point( - time=0.968, lat=38.88615816666667, lon=-76.992434, alt=7.7, angle=None - ), - geo.Point( - time=1.005, lat=38.88615816666667, lon=-76.992434, alt=7.7, angle=None - ), - ] == list(info.gps or []) + assert info == blackvue_parser.BlackVueInfo( + gps=[ + geo.Point( + time=0.0, lat=38.88615816666667, lon=-76.992434, alt=None, angle=None + ) + ], + make="BlackVue", + model="", + ) diff --git a/tests/unit/test_ffmpeg.py b/tests/unit/test_ffmpeg.py index 3079e7eac..6c0b016e5 100644 --- a/tests/unit/test_ffmpeg.py +++ b/tests/unit/test_ffmpeg.py @@ -8,12 +8,12 @@ from mapillary_tools import ffmpeg -from ..integration.fixtures import IS_FFMPEG_INSTALLED, setup_data +from ..integration.fixtures import pytest_skip_if_not_ffmpeg_installed, setup_data def test_ffmpeg_run_ok(): - if not IS_FFMPEG_INSTALLED: - pytest.skip("ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() + ff = ffmpeg.FFMPEG() ff.run_ffmpeg_non_interactive(["-version"]) @@ -23,15 +23,14 @@ def test_ffmpeg_run_ok(): raises=ffmpeg.FFmpegCalledProcessError, ) def test_ffmpeg_run_raise(): - if not IS_FFMPEG_INSTALLED: - pytest.skip("ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() + ff = ffmpeg.FFMPEG() ff.run_ffmpeg_non_interactive(["foo"]) def test_ffmpeg_extract_frames_ok(setup_data: py.path.local): - if not IS_FFMPEG_INSTALLED: - pytest.skip("ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() ff = ffmpeg.FFMPEG() @@ -59,8 +58,7 @@ def test_ffmpeg_extract_frames_ok(setup_data: py.path.local): def test_ffmpeg_extract_frames_with_specifier_ok(setup_data: py.path.local): - if not IS_FFMPEG_INSTALLED: - pytest.skip("ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() ff = ffmpeg.FFMPEG() @@ -93,8 +91,7 @@ def test_ffmpeg_extract_frames_with_specifier_ok(setup_data: py.path.local): def test_ffmpeg_extract_specified_frames_ok(setup_data: py.path.local): - if not IS_FFMPEG_INSTALLED: - pytest.skip("ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() ff = ffmpeg.FFMPEG() @@ -115,8 +112,7 @@ def test_ffmpeg_extract_specified_frames_ok(setup_data: py.path.local): def test_ffmpeg_extract_specified_frames_empty_ok(setup_data: py.path.local): - if not IS_FFMPEG_INSTALLED: - pytest.skip("ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() ff = ffmpeg.FFMPEG() @@ -132,8 +128,7 @@ def test_ffmpeg_extract_specified_frames_empty_ok(setup_data: py.path.local): def test_probe_format_and_streams_ok(setup_data: py.path.local): - if not IS_FFMPEG_INSTALLED: - pytest.skip("ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() video_path = Path(setup_data.join("videos/sample-5s.mp4")) @@ -150,8 +145,7 @@ def test_probe_format_and_streams_ok(setup_data: py.path.local): def test_probe_format_and_streams_gopro_ok(setup_data: py.path.local): - if not IS_FFMPEG_INSTALLED: - pytest.skip("ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() video_path = Path(setup_data.join("gopro_data/hero8.mp4")) @@ -169,8 +163,7 @@ def test_probe_format_and_streams_gopro_ok(setup_data: py.path.local): def test_ffmpeg_not_exists(): - if not IS_FFMPEG_INSTALLED: - pytest.skip("ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() ff = ffmpeg.FFMPEG() try: @@ -194,8 +187,7 @@ def test_ffmpeg_not_exists(): def test_ffprobe_not_exists(): - if not IS_FFMPEG_INSTALLED: - pytest.skip("ffmpeg not installed") + pytest_skip_if_not_ffmpeg_installed() ff = ffmpeg.FFMPEG() try: