diff --git a/src/models/europython.py b/src/models/europython.py index 7f6eae9..ce2c5d2 100644 --- a/src/models/europython.py +++ b/src/models/europython.py @@ -91,18 +91,28 @@ def extract_twitter_url(text: str) -> str: return twitter_url.split("?")[0] @staticmethod - def extract_mastodon_url(text: str) -> str: + def extract_mastodon_url(text: str) -> None | str: """ - Extract the Mastodon URL from the answer, handle @username@instance format + Normalize Mastodon handle or URL to the format: https:///@ """ - if not text.startswith(("https://", "http://")) and text.count("@") == 2: - mastodon_url = f"https://{text.split('@')[2]}/@{text.split('@')[1]}" - else: - mastodon_url = ( - f"https://{text.removeprefix('https://').removeprefix('http://')}" - ) + text = text.strip().split("?", 1)[0] + + # Handle @username@instance or username@instance formats + if "@" in text and not text.startswith("http"): + parts = text.split("@") + if len(parts) == 3: # @username@instance + _, username, instance = parts + elif len(parts) == 2: # username@instance + username, instance = parts + else: + return None + return f"https://{instance}/@{username}" - return mastodon_url.split("?")[0] + # Handle full URLs + if text.startswith("http://"): + text = "https://" + text[len("http://") :] + + return text @staticmethod def extract_linkedin_url(text: str) -> str: @@ -126,7 +136,7 @@ def extract_bluesky_url(text: str) -> str: Returns a normalized BlueSky URL in the form https://bsky.app/profile/.bsky.social, or uses the entire domain if it's custom (e.g., .dev). """ - text = text.split("?", 1)[0].strip() + text = text.strip().split("?", 1)[0] if text.startswith("https://"): text = text[8:] @@ -136,6 +146,10 @@ def extract_bluesky_url(text: str) -> str: if text.startswith("www."): text = text[4:] + # Remove @ if present + if text.startswith("@"): + text = text[1:] + for marker in ("bsky.app/profile/", "bsky/"): if marker in text: text = text.split(marker, 1)[1] diff --git a/tests/test_social_media_extractions.py b/tests/test_social_media_extractions.py index 0c9f515..5c13a50 100644 --- a/tests/test_social_media_extractions.py +++ b/tests/test_social_media_extractions.py @@ -13,6 +13,7 @@ "https://mastodon.social/@username", ), ("@username@mastodon.social", "https://mastodon.social/@username"), + ("username@mastodon.social", "https://mastodon.social/@username"), ], ) def test_extract_mastodon_url(input_string: str, result: str) -> None: @@ -38,7 +39,9 @@ def test_extract_linkedin_url(input_string: str, result: str) -> None: ("input_string", "result"), [ ("username", "https://bsky.app/profile/username.bsky.social"), + ("@username", "https://bsky.app/profile/username.bsky.social"), ("username.dev", "https://bsky.app/profile/username.dev"), + ("@username.dev", "https://bsky.app/profile/username.dev"), ("username.bsky.social", "https://bsky.app/profile/username.bsky.social"), ("bsky.app/profile/username", "https://bsky.app/profile/username.bsky.social"), ("bsky/username", "https://bsky.app/profile/username.bsky.social"),