arduino
diff --git a/‎pyproject.toml‎
Lines changed: 5 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/arduino/app_bricks/cloud_asr/__init__.py‎
Lines changed: 8 additions & 0 deletions b/‎src/arduino/app_bricks/cloud_asr/__init__.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎src/arduino/app_bricks/cloud_asr/brick_config.yaml‎
Lines changed: 16 additions & 0 deletions b/‎src/arduino/app_bricks/cloud_asr/brick_config.yaml‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎src/arduino/app_bricks/cloud_asr/cloud_asr.py‎
Lines changed: 158 additions & 0 deletions b/‎src/arduino/app_bricks/cloud_asr/cloud_asr.py‎
Lines changed: 158 additions & 0 deletions
diff --git a/‎src/arduino/app_bricks/cloud_asr/providers/__init__.py‎
Lines changed: 53 additions & 0 deletions b/‎src/arduino/app_bricks/cloud_asr/providers/__init__.py‎
Lines changed: 53 additions & 0 deletions
@@ -77,6 +77,10 @@ cloud_llm = [
     "langchain-openai >=0.3.0, <0.4.0",
     "langchain-google-genai >=2.1.0, <2.2.0",
 ]
+cloud_asr = [
+    "websocket-client",
+    "google-cloud-speech>=2.27.0",
+]
 
 all = [
     "arduino_app_bricks[dbstorage_influx]",
@@ -90,6 +94,7 @@ all = [
     "arduino_app_bricks[stream]",
     "arduino_app_bricks[arduino_cloud]",
     "arduino_app_bricks[cloud_llm]",
+    "arduino_app_bricks[cloud_asr]",
 ]
 
 [project.urls]
 
@@ -0,0 +1,8 @@
+# SPDX-FileCopyrightText: Copyright (C) ARDUINO SRL (http://www.arduino.cc)
+#
+# SPDX-License-Identifier: MPL-2.0
+
+from .cloud_asr import CloudASR
+from .providers import ASREvent, CloudProvider
+
+__all__ = ["CloudASR", "ASREvent", "CloudProvider"]
@@ -0,0 +1,16 @@
+id: arduino:cloud_asr
+name: Cloud ASR
+description: |
+  Cloud ASR Brick provides a unified and flexible way to connect cloud-based Automatic Speech Recognition (ASR) services and transform spoken audio into text.
+  It enables real-time, streaming transcription from a connected microphone, leveraging leading cloud providers to deliver low-latency speech-to-text processing.
+category: audio
+mount_devices_into_container: true
+required_devices:
+  - microphone
+requires_container: false
+requires_model: false
+variables:
+  - name: API_KEY
+    description: API Key for the cloud-based Speech to Text service
+  - name: LANGUAGE
+    description: Language code for transcription (e.g., en, it). Default: en
@@ -0,0 +1,158 @@
+# SPDX-FileCopyrightText: Copyright (C) ARDUINO SRL (http://www.arduino.cc)
+#
+# SPDX-License-Identifier: MPL-2.0
+
+from __future__ import annotations
+
+import os
+import queue
+import threading
+from typing import Iterator, Callable, Optional
+
+import numpy as np
+
+from arduino.app_peripherals.microphone import Microphone
+from arduino.app_utils import Logger, brick
+
+from .providers import ASRProvider, CloudProvider, DEFAULT_PROVIDER, provider_factory
+
+logger = Logger(__name__)
+
+DEFAULT_LANGUAGE = "en"
+
+
+@brick
+class CloudASR:
+    """
+    Cloud-based speech-to-text with pluggable cloud providers.
+    It captures audio from a microphone and streams it to the selected cloud ASR provider for transcription.
+    The recognized text is yielded as events in real-time.
+    """
+
+    def __init__(
+        self,
+        api_key: str = os.getenv("API_KEY", ""),
+        provider: CloudProvider = DEFAULT_PROVIDER,
+        mic: Optional[Microphone] = None,
+        language: str = os.getenv("LANGUAGE", ""),
+        stream_partial: bool = True,
+    ):
+        if mic:
+            logger.info(f"[{self.__class__.__name__}] Using provided microphone: {mic}")
+            self._mic = mic
+        else:
+            self._mic = Microphone()
+
+        self._language = language
+        self._stream_partial = stream_partial
+        self._mic_lock = threading.Lock()
+        self._provider: ASRProvider = provider_factory(
+            api_key=api_key,
+            name=provider,
+            language=self._language,
+            sample_rate=self._mic.sample_rate,
+            stream_partial=self._stream_partial,
+        )
+
+        self.handlers: list[Callable[[dict], None]] = []
+        self.handlers_lock = threading.Lock()
+
+    def start(self):
+        with self._mic_lock:
+            if not self._mic.is_recording.is_set():
+                self._mic.start()
+                logger.info(f"[{self.__class__.__name__}] Microphone started.")
+
+    def stop(self):
+        with self._mic_lock:
+            if self._mic.is_recording.is_set():
+                self._mic.stop()
+                logger.info(f"[{self.__class__.__name__}] Microphone stopped.")
+
+    def on_detect(self, handler):
+        """Register a callback to be invoked when speech is detected."""
+        with self.handlers_lock:
+            self.handlers.append(handler)
+
+    @brick.loop
+    def _detect_loop(self):
+        """Continuously listen for speech and invoke handlers."""
+        for resp in self.transcribe():
+            with self.handlers_lock:
+                for handler in self.handlers:
+                    try:
+                        handler(resp)
+                    except Exception as exc:
+                        logger.error(f"Error in speech detected handler: {exc}")
+
+    def transcribe(self) -> Iterator[dict]:
+        """Perform speech-to-text recognition.
+
+        Returns:
+            Iterator[dict]: Generator yielding {"event": ("partial_text"|"text"|"error"), "data": "<payload>"} messages.
+        """
+
+        provider = self._provider
+        messages: queue.Queue[dict] = queue.Queue()
+        stop_event = threading.Event()
+
+        def _send():
+            try:
+                for chunk in self._mic.stream():
+                    if stop_event.is_set():
+                        break
+                    if chunk is None:
+                        continue
+                    pcm_chunk_np = np.asarray(chunk, dtype=np.int16)
+                    provider.send_audio(pcm_chunk_np.tobytes())
+            except KeyboardInterrupt:
+                logger.info("Recognition interrupted by user. Exiting...")
+            except Exception as exc:
+                logger.error("Error while streaming microphone audio: %s", exc)
+                messages.put({"event": "error", "data": str(exc)})
+            finally:
+                stop_event.set()
+
+        partial_buffer = ""
+
+        def _recv():
+            nonlocal partial_buffer
+            try:
+                while not stop_event.is_set():
+                    result = provider.recv()
+                    if result is None:
+                        continue
+
+                    data = result.data
+                    if result.event == "partial_text":
+                        if self._provider.partial_mode == "replace":
+                            partial_buffer = str(data)
+                        else:
+                            partial_buffer += str(data)
+                    elif result.event == "text":
+                        data = data or partial_buffer
+                        partial_buffer = ""
+                    messages.put({"event": result.event, "data": data})
+
+            except Exception as exc:
+                logger.error("Error receiving transcription events: %s", exc)
+                messages.put({"event": "error", "data": str(exc)})
+                stop_event.set()
+
+        send_thread = threading.Thread(target=_send, daemon=True)
+        recv_thread = threading.Thread(target=_recv, daemon=True)
+        send_thread.start()
+        recv_thread.start()
+
+        try:
+            while recv_thread.is_alive() or send_thread.is_alive() or not messages.empty():
+                try:
+                    msg = messages.get(timeout=0.1)
+                    yield msg
+                except queue.Empty:
+                    continue
+        finally:
+            stop_event.set()
+            send_thread.join(timeout=1)
+            recv_thread.join(timeout=1)
+            provider.stop()
@@ -0,0 +1,53 @@
+# SPDX-FileCopyrightText: Copyright (C) ARDUINO SRL (http://www.arduino.cc)
+#
+# SPDX-License-Identifier: MPL-2.0
+
+from enum import Enum
+
+from .openai import OpenAITranscribe
+from .google import GoogleSpeech
+from .types import ASREvent, ASRProvider
+
+
+class CloudProvider(str, Enum):
+    OPENAI_TRANSCRIBE = "openai-transcribe"
+    GOOGLE_SPEECH = "google-speech"
+
+
+DEFAULT_PROVIDER = CloudProvider.OPENAI_TRANSCRIBE
+
+
+def provider_factory(
+    api_key: str,
+    language: str,
+    sample_rate: int,
+    stream_partial: bool,
+    name: CloudProvider = DEFAULT_PROVIDER,
+) -> ASRProvider:
+    """Return the ASR cloud provider implementation."""
+    if name == CloudProvider.OPENAI_TRANSCRIBE:
+        return OpenAITranscribe(
+            api_key=api_key,
+            language=language,
+            sample_rate=sample_rate,
+            stream_partial=stream_partial,
+        )
+    if name == CloudProvider.GOOGLE_SPEECH:
+        return GoogleSpeech(
+            api_key=api_key,
+            language=language,
+            sample_rate=sample_rate,
+            stream_partial=stream_partial,
+        )
+    raise ValueError(f"Unsupported ASR cloud provider: {name}")
+
+
+__all__ = [
+    "ASREvent",
+    "ASRProvider",
+    "CloudProvider",
+    "DEFAULT_PROVIDER",
+    "GoogleSpeech",
+    "OpenAITranscribe",
+    "provider_factory",
+]