From d8529a55ee4e0e7af3d92283ab6d8666ea644a64 Mon Sep 17 00:00:00 2001 From: Mia Date: Wed, 9 Apr 2025 22:27:57 +0200 Subject: [PATCH 1/3] Add Bluesky to supported socials --- data/examples/europython/speakers.json | 1 + data/examples/pretalx/speakers.json | 15 +++++++++++ src/misc.py | 3 ++- src/models/europython.py | 36 ++++++++++++++++++++++++++ tests/test_social_media_extractions.py | 34 ++++++++++++++++++++++++ 5 files changed, 88 insertions(+), 1 deletion(-) diff --git a/data/examples/europython/speakers.json b/data/examples/europython/speakers.json index 178299a..82cb15e 100644 --- a/data/examples/europython/speakers.json +++ b/data/examples/europython/speakers.json @@ -10,6 +10,7 @@ "homepage": null, "gitx": "https://github.com/F3DC8A", "linkedin_url": "https://www.linkedin.com/in/F3DC8A", + "bluesky_url": "https://bsky.app/profile/username.bsky.social", "mastodon_url": null, "twitter_url": null, "website_url": "https://ep2024.europython.eu/speaker/a-speaker" diff --git a/data/examples/pretalx/speakers.json b/data/examples/pretalx/speakers.json index 7c961a0..a408fec 100644 --- a/data/examples/pretalx/speakers.json +++ b/data/examples/pretalx/speakers.json @@ -89,6 +89,21 @@ "review": null, "person": "F3DC8A", "options": [] + }, + { + "id": 272249, + "question": { + "id": 3416, + "question": { + "en": "Social (Bluesky)" + } + }, + "answer": "username", + "answer_file": null, + "submission": null, + "review": null, + "person": "F3DC8A", + "options": [] }, { "id": 272249, diff --git a/src/misc.py b/src/misc.py index 51b74e0..276cd4e 100644 --- a/src/misc.py +++ b/src/misc.py @@ -5,8 +5,9 @@ class SpeakerQuestion: affiliation = "Company/Organization/Educational Institution" homepage = "Social (Homepage)" twitter = "Social (X/Twitter)" - mastodon = "Social (Mastodon)" + bluesky = "Social (Bluesky)" linkedin = "Social (LinkedIn)" + mastodon = "Social (Mastodon)" gitx = "Social (GitHub/GitLab)" diff --git a/src/models/europython.py b/src/models/europython.py index 88360c4..7f6eae9 100644 --- a/src/models/europython.py +++ b/src/models/europython.py @@ -28,6 +28,7 @@ class EuroPythonSpeaker(BaseModel): twitter_url: str | None = None mastodon_url: str | None = None linkedin_url: str | None = None + bluesky_url: str | None = None gitx: str | None = None @computed_field @@ -58,6 +59,11 @@ def extract_answers(cls, values) -> dict: answer.answer_text.strip().split()[0] ) + if answer.question_text == SpeakerQuestion.bluesky: + values["bluesky_url"] = cls.extract_bluesky_url( + answer.answer_text.strip().split()[0] + ) + if answer.question_text == SpeakerQuestion.linkedin: values["linkedin_url"] = cls.extract_linkedin_url( answer.answer_text.strip().split()[0] @@ -114,6 +120,36 @@ def extract_linkedin_url(text: str) -> str: return linkedin_url.split("?")[0] + @staticmethod + def extract_bluesky_url(text: str) -> str: + """ + Returns a normalized BlueSky URL in the form https://bsky.app/profile/.bsky.social, + or uses the entire domain if it's custom (e.g., .dev). + """ + text = text.split("?", 1)[0].strip() + + if text.startswith("https://"): + text = text[8:] + elif text.startswith("http://"): + text = text[7:] + + if text.startswith("www."): + text = text[4:] + + for marker in ("bsky.app/profile/", "bsky/"): + if marker in text: + text = text.split(marker, 1)[1] + break + # case custom domain + else: + text = text.rsplit("/", 1)[-1] + + # if there's no dot, assume it's a non-custom handle and append '.bsky.social' + if "." not in text: + text += ".bsky.social" + + return f"https://bsky.app/profile/{text}" + class EuroPythonSession(BaseModel): """ diff --git a/tests/test_social_media_extractions.py b/tests/test_social_media_extractions.py index 20b1c74..0c9f515 100644 --- a/tests/test_social_media_extractions.py +++ b/tests/test_social_media_extractions.py @@ -32,3 +32,37 @@ def test_extract_mastodon_url(input_string: str, result: str) -> None: ) def test_extract_linkedin_url(input_string: str, result: str) -> None: assert EuroPythonSpeaker.extract_linkedin_url(input_string) == result + + +@pytest.mark.parametrize( + ("input_string", "result"), + [ + ("username", "https://bsky.app/profile/username.bsky.social"), + ("username.dev", "https://bsky.app/profile/username.dev"), + ("username.bsky.social", "https://bsky.app/profile/username.bsky.social"), + ("bsky.app/profile/username", "https://bsky.app/profile/username.bsky.social"), + ("bsky/username", "https://bsky.app/profile/username.bsky.social"), + ( + "www.bsky.app/profile/username", + "https://bsky.app/profile/username.bsky.social", + ), + ( + "www.bsky.app/profile/username.bsky.social", + "https://bsky.app/profile/username.bsky.social", + ), + ( + "http://bsky.app/profile/username", + "https://bsky.app/profile/username.bsky.social", + ), + ( + "https://bsky.app/profile/username.com", + "https://bsky.app/profile/username.com", + ), + ( + "https://bsky.app/profile/username.bsky.social", + "https://bsky.app/profile/username.bsky.social", + ), + ], +) +def test_extract_bluesky_url(input_string: str, result: str) -> None: + assert EuroPythonSpeaker.extract_bluesky_url(input_string) == result From 66d1b4182f95be2f4ce1e8b02c0a54eab4dd2386 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 9 Apr 2025 20:29:49 +0000 Subject: [PATCH 2/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci From 3318f88c69750af3ef7b5a038ae5b55b357ee598 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 13 Apr 2025 14:46:40 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/download.py | 2 +- src/transform.py | 18 +++++++++++++----- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/download.py b/src/download.py index 81dc553..68f816f 100644 --- a/src/download.py +++ b/src/download.py @@ -61,7 +61,7 @@ pbar.close() - # Save the data to a file + # Save the data to a file filename = f"{resource_name}_latest.json" filepath = Config.raw_path / filename diff --git a/src/transform.py b/src/transform.py index 36c918e..2f80179 100644 --- a/src/transform.py +++ b/src/transform.py @@ -7,7 +7,9 @@ from src.utils.utils import Utils if __name__ == "__main__": - parser = ArgumentParser(description="Transform data from Pretalx to EuroPython format and save it.") + parser = ArgumentParser( + description="Transform data from Pretalx to EuroPython format and save it." + ) parser.add_argument( "-w", "--warn-dupes", @@ -24,8 +26,9 @@ args = parser.parse_args() exclude = set(args.exclude or []) - - print(f"Parsing submissions from {Config.raw_path}/submissions_latest.json...", end="") + print( + f"Parsing submissions from {Config.raw_path}/submissions_latest.json...", end="" + ) pretalx_submissions = Parse.publishable_submissions( Config.raw_path / "submissions_latest.json" ) @@ -38,7 +41,10 @@ print(" done.") if "youtube" not in exclude: - print(f"Parsing YouTube data from {Config.raw_path}/youtube_latest.json...", end="") + print( + f"Parsing YouTube data from {Config.raw_path}/youtube_latest.json...", + end="", + ) youtube_data = Parse.youtube(Config.raw_path / "youtube_latest.json") print(" done.") else: @@ -77,7 +83,9 @@ print(" done.") if "schedule" not in exclude: - print("\nParsing schedule from {Config.raw_path}/schedule_latest.json...", end="") + print( + "\nParsing schedule from {Config.raw_path}/schedule_latest.json...", end="" + ) pretalx_schedule = Parse.schedule(Config.raw_path / "schedule_latest.json") print(" done.")