Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/fishaudio/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from ._version import __version__
from .client import AsyncFishAudio, FishAudio
from .core.iterators import AsyncAudioStream, AudioStream
from .core.websocket_options import WebSocketOptions
from .exceptions import (
APIError,
AuthenticationError,
Expand All @@ -41,7 +42,7 @@
ValidationError,
WebSocketError,
)
from .types import FlushEvent, TextEvent
from .types import FlushEvent, ReferenceAudio, TextEvent, TTSConfig
from .utils import play, save, stream

# Main exports
Expand All @@ -56,8 +57,12 @@
# Audio streams
"AudioStream",
"AsyncAudioStream",
# Configuration
"TTSConfig",
"WebSocketOptions",
# Types
"FlushEvent",
"ReferenceAudio",
"TextEvent",
# Exceptions
"APIError",
Expand Down
2 changes: 2 additions & 0 deletions src/fishaudio/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
from .client_wrapper import AsyncClientWrapper, ClientWrapper
from .omit import OMIT
from .request_options import RequestOptions
from .websocket_options import WebSocketOptions

__all__ = [
"AsyncClientWrapper",
"ClientWrapper",
"OMIT",
"RequestOptions",
"WebSocketOptions",
]
58 changes: 58 additions & 0 deletions src/fishaudio/core/websocket_options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""WebSocket-level options for WebSocket connections."""

from typing import Any, Dict, Optional


class WebSocketOptions:
"""
Options for configuring WebSocket connections.

These options are passed directly to httpx_ws's connect_ws/aconnect_ws functions.
For complete documentation, see https://frankie567.github.io/httpx-ws/reference/httpx_ws/

Attributes:
keepalive_ping_timeout_seconds: Maximum delay the client will wait for an answer
to its Ping event. If the delay is exceeded, WebSocketNetworkError will be
raised and the connection closed. Default: 20 seconds.
keepalive_ping_interval_seconds: Interval at which the client will automatically
send a Ping event to keep the connection alive. Set to None to disable this
mechanism. Default: 20 seconds.
max_message_size_bytes: Message size in bytes to receive from the server.
Default: 65536 bytes (64 KiB).
queue_size: Size of the queue where received messages will be held until they
are consumed. If the queue is full, the client will stop receiving messages
from the server until the queue has room available. Default: 512.

Note:
Parameter descriptions adapted from httpx_ws documentation.
"""

def __init__(
self,
*,
keepalive_ping_timeout_seconds: Optional[float] = None,
keepalive_ping_interval_seconds: Optional[float] = None,
max_message_size_bytes: Optional[int] = None,
queue_size: Optional[int] = None,
):
self.keepalive_ping_timeout_seconds = keepalive_ping_timeout_seconds
self.keepalive_ping_interval_seconds = keepalive_ping_interval_seconds
self.max_message_size_bytes = max_message_size_bytes
self.queue_size = queue_size

def to_httpx_ws_kwargs(self) -> Dict[str, Any]:
"""Convert to kwargs dict for httpx_ws aconnect_ws/connect_ws."""
kwargs = {}
if self.keepalive_ping_timeout_seconds is not None:
kwargs["keepalive_ping_timeout_seconds"] = (
self.keepalive_ping_timeout_seconds
)
if self.keepalive_ping_interval_seconds is not None:
kwargs["keepalive_ping_interval_seconds"] = (
self.keepalive_ping_interval_seconds
)
if self.max_message_size_bytes is not None:
kwargs["max_message_size_bytes"] = self.max_message_size_bytes
if self.queue_size is not None:
kwargs["queue_size"] = self.queue_size
return kwargs
42 changes: 39 additions & 3 deletions src/fishaudio/resources/tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from httpx_ws import AsyncWebSocketSession, WebSocketSession, aconnect_ws, connect_ws

from .realtime import aiter_websocket_audio, iter_websocket_audio
from ..core import AsyncClientWrapper, ClientWrapper, RequestOptions
from ..core import AsyncClientWrapper, ClientWrapper, RequestOptions, WebSocketOptions
from ..core.iterators import AsyncAudioStream, AudioStream
from ..types import (
AudioFormat,
Expand Down Expand Up @@ -215,6 +215,7 @@ def stream_websocket(
config: TTSConfig = TTSConfig(),
model: Model = "s1",
max_workers: int = 10,
ws_options: Optional[WebSocketOptions] = None,
Copy link

Copilot AI Nov 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ws_options parameter is missing from the method's docstring Args section. Please add documentation for this parameter to help users understand how to configure WebSocket connection options.

Copilot uses AI. Check for mistakes.
) -> Iterator[bytes]:
"""
Stream text and receive audio in real-time via WebSocket.
Expand All @@ -231,13 +232,16 @@ def stream_websocket(
config: TTS configuration (audio settings, voice, model parameters)
model: TTS model to use
max_workers: ThreadPoolExecutor workers for concurrent sender
ws_options: WebSocket connection options for configuring timeouts, message size limits, etc.
Useful for long-running generations that may exceed default timeout values.
See WebSocketOptions class for available parameters.

Returns:
Iterator of audio bytes

Example:
```python
from fishaudio import FishAudio, TTSConfig, ReferenceAudio
from fishaudio import FishAudio, TTSConfig, ReferenceAudio, WebSocketOptions

client = FishAudio(api_key="...")

Expand Down Expand Up @@ -273,6 +277,16 @@ def text_generator():
):
f.write(audio_chunk)

# With WebSocket options for long-running generations
# Useful if you're generating very long responses that may take >20 seconds
ws_options = WebSocketOptions(keepalive_ping_timeout_seconds=60.0)
with open("output.mp3", "wb") as f:
for audio_chunk in client.tts.stream_websocket(
text_generator(),
ws_options=ws_options
):
f.write(audio_chunk)

# Parameters override config values
config = TTSConfig(format="mp3", latency="balanced")
with open("output.wav", "wb") as f:
Expand Down Expand Up @@ -305,6 +319,9 @@ def text_generator():
speed, base=config.prosody
)

# Prepare WebSocket connection kwargs
ws_kwargs = ws_options.to_httpx_ws_kwargs() if ws_options else {}

executor = ThreadPoolExecutor(max_workers=max_workers)

try:
Expand All @@ -316,6 +333,7 @@ def text_generator():
"model": model,
"Authorization": f"Bearer {self._client.api_key}",
},
**ws_kwargs,
) as ws:

def sender():
Expand Down Expand Up @@ -502,6 +520,7 @@ async def stream_websocket(
speed: Optional[float] = None,
config: TTSConfig = TTSConfig(),
model: Model = "s1",
ws_options: Optional[WebSocketOptions] = None,
Copy link

Copilot AI Nov 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ws_options parameter is missing from the method's docstring Args section. Please add documentation for this parameter to help users understand how to configure WebSocket connection options.

Copilot uses AI. Check for mistakes.
):
"""
Stream text and receive audio in real-time via WebSocket (async).
Expand All @@ -517,13 +536,16 @@ async def stream_websocket(
speed: Speech speed multiplier, e.g. 1.5 for 1.5x speed (overrides config.prosody.speed if provided)
config: TTS configuration (audio settings, voice, model parameters)
model: TTS model to use
ws_options: WebSocket connection options for configuring timeouts, message size limits, etc.
Useful for long-running generations that may exceed default timeout values.
See WebSocketOptions class for available parameters.

Returns:
Async iterator of audio bytes

Example:
```python
from fishaudio import AsyncFishAudio, TTSConfig, ReferenceAudio
from fishaudio import AsyncFishAudio, TTSConfig, ReferenceAudio, WebSocketOptions

client = AsyncFishAudio(api_key="...")

Expand Down Expand Up @@ -559,6 +581,16 @@ async def text_generator():
):
await f.write(audio_chunk)

# With WebSocket options for long-running generations
# Useful if you're generating very long responses that may take >20 seconds
ws_options = WebSocketOptions(keepalive_ping_timeout_seconds=60.0)
async with aiofiles.open("output.mp3", "wb") as f:
async for audio_chunk in client.tts.stream_websocket(
text_generator(),
ws_options=ws_options
):
await f.write(audio_chunk)

# Parameters override config values
config = TTSConfig(format="mp3", latency="balanced")
async with aiofiles.open("output.wav", "wb") as f:
Expand Down Expand Up @@ -591,11 +623,15 @@ async def text_generator():
speed, base=config.prosody
)

# Prepare WebSocket connection kwargs
ws_kwargs = ws_options.to_httpx_ws_kwargs() if ws_options else {}

ws: AsyncWebSocketSession
async with aconnect_ws(
"/v1/tts/live",
client=self._client.client,
headers={"model": model, "Authorization": f"Bearer {self._client.api_key}"},
**ws_kwargs,
) as ws:

async def sender():
Expand Down
54 changes: 54 additions & 0 deletions tests/integration/test_tts_websocket_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import pytest

from fishaudio import WebSocketOptions
from fishaudio.types import Prosody, TTSConfig, TextEvent, FlushEvent
from .conftest import TEST_REFERENCE_ID

Expand Down Expand Up @@ -118,6 +119,59 @@ def text_stream():
with pytest.raises(WebSocketError, match="WebSocket stream ended with error"):
list(client.tts.stream_websocket(text_stream()))

def test_websocket_very_long_generation_with_timeout(self, client, save_audio):
"""
Test that very long text generation succeeds with increased timeout.

This test generates a very long response that could potentially take >20 seconds
to fully generate, which would cause a WebSocketNetworkError with the default
keepalive_ping_timeout_seconds=20. By using an increased timeout of 60 seconds,
we can handle longer generation times without disconnection.

This is the SOLUTION to issue #47. To reproduce the timeout issue, run:
python reproduce_issue_47.py --mode=both
"""
# Use significantly increased timeout to handle very long generations
ws_options = WebSocketOptions(
keepalive_ping_timeout_seconds=60.0,
keepalive_ping_interval_seconds=30.0,
)

def text_stream():
# Generate a very long piece of text that will take significant time to process
long_text = [
"This is a test of very long form text-to-speech generation. ",
"We are testing the ability to handle extended generation times without timing out. ",
"The default WebSocket keepalive timeout of 20 seconds can be insufficient for long responses. ",
"By increasing the keepalive_ping_timeout_seconds to 60 seconds, we allow for longer gaps between chunks. ",
"This is particularly important for conversational AI applications where responses can be quite lengthy. ",
"The WebSocket connection should remain stable throughout the entire generation process. ",
"We include enough text here to ensure the generation takes a substantial amount of time. ",
"This helps verify that the increased timeout setting is working correctly. ",
"The audio streaming should continue smoothly without any network errors. ",
"Each sentence adds more content to be synthesized into speech. ",
"The system should handle this gracefully with the custom WebSocket options. ",
"This demonstrates the practical value of the WebSocketOptions feature. ",
"Users can now configure timeouts based on their specific use case requirements. ",
"Long-form content generation is now much more reliable. ",
"The implementation passes through all necessary parameters to the underlying httpx_ws library. ",
]
for sentence in long_text:
yield sentence

# This should succeed with increased timeout
audio_chunks = list(
client.tts.stream_websocket(text_stream(), ws_options=ws_options)
)

assert len(audio_chunks) > 0, "Should receive audio chunks for very long text"
complete_audio = b"".join(audio_chunks)
# Very long text should produce substantial audio
assert len(complete_audio) > 10000, (
"Very long text should produce substantial audio data"
)
save_audio(audio_chunks, "test_websocket_very_long_with_timeout.mp3")


class TestAsyncTTSWebSocketIntegration:
"""Test async TTS WebSocket streaming with real API."""
Expand Down
40 changes: 39 additions & 1 deletion tests/unit/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,13 @@
from unittest.mock import patch
import httpx

from fishaudio.core import OMIT, ClientWrapper, AsyncClientWrapper, RequestOptions
from fishaudio.core import (
OMIT,
ClientWrapper,
AsyncClientWrapper,
RequestOptions,
WebSocketOptions,
)


class TestOMIT:
Expand Down Expand Up @@ -51,6 +57,38 @@ def test_get_timeout(self):
assert timeout.connect == 30.0


class TestWebSocketOptions:
"""Test WebSocketOptions class."""

def test_to_httpx_ws_kwargs_all_options(self):
"""Test to_httpx_ws_kwargs with all options set."""
options = WebSocketOptions(
keepalive_ping_timeout_seconds=60.0,
keepalive_ping_interval_seconds=30.0,
max_message_size_bytes=131072,
queue_size=1024,
)
kwargs = options.to_httpx_ws_kwargs()
assert kwargs == {
"keepalive_ping_timeout_seconds": 60.0,
"keepalive_ping_interval_seconds": 30.0,
"max_message_size_bytes": 131072,
"queue_size": 1024,
}

def test_to_httpx_ws_kwargs_partial_options(self):
"""Test to_httpx_ws_kwargs with only some options set."""
options = WebSocketOptions(keepalive_ping_timeout_seconds=60.0)
kwargs = options.to_httpx_ws_kwargs()
assert kwargs == {"keepalive_ping_timeout_seconds": 60.0}
assert "keepalive_ping_interval_seconds" not in kwargs

def test_to_httpx_ws_kwargs_no_options(self):
"""Test to_httpx_ws_kwargs with no options set."""
options = WebSocketOptions()
assert options.to_httpx_ws_kwargs() == {}


class TestClientWrapper:
"""Test sync ClientWrapper."""

Expand Down
Loading