From 642c0f8ded4531b08d9888048de3e0d1c406f381 Mon Sep 17 00:00:00 2001
From: James Ding <jamesding365@gmail.com>
Date: Wed, 12 Nov 2025 14:51:34 -0600
Subject: [PATCH 01/16] docs: update README to highlight new API and migration
 guide

---
 README.md | 250 ++++++++++++++++++++++++++++--------------------------
 1 file changed, 131 insertions(+), 119 deletions(-)

diff --git a/README.md b/README.md
index 0923942..eddc4ec 100644
--- a/README.md
+++ b/README.md
@@ -1,205 +1,217 @@
 # Fish Audio Python SDK
 
-To provide convenient Python program integration for https://docs.fish.audio.
+[![PyPI version](https://badge.fury.io/py/fish-audio-sdk.svg)](https://badge.fury.io/py/fish-audio-sdk)
+[![PyPI - Downloads](https://img.shields.io/pypi/dm/fish-audio-sdk)](https://pypi.org/project/fish-audio-sdk/)
+[![Build Status](https://img.shields.io/github/actions/workflow/status/fishaudio/fish-audio-python/ci.yml?branch=main)](https://github.com/fishaudio/fish-audio-python/actions)
+[![codecov](https://codecov.io/gh/fishaudio/fish-audio-python/branch/main/graph/badge.svg)](https://codecov.io/gh/fishaudio/fish-audio-python)
+[![Python Version](https://img.shields.io/pypi/pyversions/fish-audio-sdk)](https://pypi.org/project/fish-audio-sdk/)
+[![License](https://img.shields.io/github/license/fishaudio/fish-audio-python)](https://github.com/fishaudio/fish-audio-python/blob/main/LICENSE)
 
-## Install
+The official Python library for the Fish Audio API - AI-powered text-to-speech, voice cloning, and speech recognition.
 
-```bash
-pip install fish-audio-sdk
-```
-> [!NOTE]
-> The new release has not officially been released yet - please see legacy SDK documentation for now.
+[Documentation](https://docs.fish.audio) | [API Reference](https://docs.fish.audio) | [Examples](./examples/) | [Discord](https://fish.audio)
 
-## Usage
+---
 
-### New SDK (Recommended)
+## Important: New API Available
 
-The new SDK uses the `fishaudio` module:
+> **We've released a major update to the Fish Audio Python SDK!**
+>
+> The new API (`fishaudio` module) offers improved ergonomics, better type safety, and enhanced features. The legacy SDK (`fish_audio_sdk` module) continues to be supported for existing projects, but we recommend using the new API for all new development.
+>
+> **Migration:** Both APIs are available in the same package. You can migrate at your own pace. See our [Migration Guide](https://docs.fish.audio) for details.
 
-```python
-from fishaudio import FishAudio
+---
 
-client = FishAudio(api_key="your_api_key")
+## Quick Start
+
+### Installation
+
+```bash
+pip install fish-audio-sdk
 ```
 
-You can customize the base URL:
+### Basic Usage
 
 ```python
 from fishaudio import FishAudio
+from fishaudio.utils import save
 
-client = FishAudio(api_key="your_api_key", base_url="https://your-proxy-domain")
+# Set your API key via environment variable: export FISH_AUDIO_API_KEY="your-api-key"
+# Or pass it directly: FishAudio(api_key="your-api-key")
+client = FishAudio()
+
+# Convert text to speech
+audio = client.tts.convert(text="Hello from Fish Audio!")
+save(audio, "output.mp3")
 ```
 
-### Legacy SDK
+[Get your API key](https://fish.audio) | [Full Getting Started Guide](https://docs.fish.audio)
 
-The legacy SDK uses the `fish_audio_sdk` module. Initialize a `Session` to use APIs. All APIs have synchronous and asynchronous versions. If you want to use the asynchronous version of the API, you only need to rewrite the original `session.api_call(...)` to `session.api_call.awaitable(...)`.
+---
 
-```python
-from fish_audio_sdk import Session
+## Key Features
 
-session = Session("your_api_key")
-```
+- **Text-to-Speech** - Natural-sounding voice synthesis with multiple voice options
+- **Voice Cloning** - Create custom voices using reference audio samples
+- **Real-time Streaming** - Low-latency audio generation via WebSocket connections
+- **Speech-to-Text (ASR)** - Accurate automatic speech recognition with language detection
+- **Voice Management** - Create, update, and organize custom voice models
+- **Sync and Async APIs** - Full support for both synchronous and asynchronous operations
+- **Type Safety** - Complete type hints with Pydantic models throughout
 
-Sometimes, you may need to change our endpoint to another address. You can use
+---
+
+## Examples
+
+### Text-to-Speech
 
 ```python
-from fish_audio_sdk import Session
+from fishaudio import FishAudio
+from fishaudio.utils import save
 
-session = Session("your_api_key", base_url="https://your-proxy-domain")
+client = FishAudio()
+audio = client.tts.convert(text="Hello, world!")
+save(audio, "output.mp3")
 ```
 
-### Text to speech
+### Voice Cloning with Reference Audio
 
 ```python
-from fish_audio_sdk import Session, TTSRequest
+from fishaudio import FishAudio
 
-session = Session("your_api_key")
+client = FishAudio()
 
-with open("r.mp3", "wb") as f:
-    for chunk in session.tts(TTSRequest(text="Hello, world!")):
-        f.write(chunk)
+# Use a reference voice for cloning
+with open("reference.wav", "rb") as f:
+    audio = client.tts.convert(
+        text="This will sound like the reference voice!",
+        reference_audio=f.read(),
+        reference_text="Transcription of the reference audio"
+    )
 ```
 
-Or use async version:
+### Real-time Streaming
 
 ```python
-import asyncio
-import aiofiles
-
-from fish_audio_sdk import Session, TTSRequest
-
-session = Session("your_api_key")
+from fishaudio import FishAudio
+from fishaudio.utils import play
 
+client = FishAudio()
 
-async def main():
-    async with aiofiles.open("r.mp3", "wb") as f:
-        async for chunk in session.tts.awaitable(
-            TTSRequest(text="Hello, world!"),
-        ):
-            await f.write(chunk)
-
+# Stream audio in real-time
+audio_stream = client.tts.stream(
+    text="This audio streams as it's generated",
+    latency="balanced"
+)
 
-asyncio.run(main())
+play(audio_stream)
 ```
 
-#### Reference Audio
+### Speech Recognition (ASR)
 
 ```python
-from fish_audio_sdk import TTSRequest
+from fishaudio import FishAudio
 
-TTSRequest(
-    text="Hello, world!",
-    reference_id="your_model_id",
-)
+client = FishAudio()
+
+# Transcribe audio to text
+with open("audio.wav", "rb") as f:
+    result = client.asr.transcribe(audio=f.read())
+    print(result.text)
 ```
 
-Or just use `ReferenceAudio` in `TTSRequest`:
+### List and Filter Voices
 
 ```python
-from fish_audio_sdk import TTSRequest, ReferenceAudio
-
-TTSRequest(
-    text="Hello, world!",
-    references=[
-        ReferenceAudio(
-            audio=audio_file.read(),
-            text="reference audio text",
-        )
-    ],
-)
-```
+from fishaudio import FishAudio
 
-### List models
+client = FishAudio()
 
-```python
-models = session.list_models()
-print(models)
+# List available voices
+voices = client.voices.list(language="en")
+
+for voice in voices:
+    print(f"{voice.title} - {voice.id}")
 ```
 
-Or use async version:
+### Async Usage
 
 ```python
 import asyncio
-
+from fishaudio import AsyncFishAudio
 
 async def main():
-    models = await session.list_models.awaitable()
-    print(models)
+    client = AsyncFishAudio()
 
+    audio = await client.tts.convert(text="Async text-to-speech!")
+    # Process audio...
 
 asyncio.run(main())
 ```
 
-
-
-### Get a model info by id
+### Check Account Credits
 
 ```python
-model = session.get_model("your_model_id")
-print(model)
+from fishaudio import FishAudio
+
+client = FishAudio()
+credits = client.account.get_credits()
+print(f"Remaining credits: {credits.credit}")
 ```
 
-Or use async version:
+[More examples in /examples directory](./examples/)
 
-```python
-import asyncio
+---
 
+## Documentation
 
-async def main():
-    model = await session.get_model.awaitable("your_model_id")
-    print(model)
+- [API Reference](https://docs.fish.audio) - Complete API documentation with all parameters and options
+- [Tutorials & Guides](https://docs.fish.audio) - Step-by-step tutorials for common use cases
+- [Examples](./examples/) - Sample code demonstrating various features
+- [Migration Guide](https://docs.fish.audio) - Guide for upgrading from the legacy SDK
 
+---
 
-asyncio.run(main())
-```
+## Requirements
 
-### Create a model
+- Python 3.9 or higher
+- Fish Audio API key - [Get one here](https://fish.audio)
 
-```python
-model = session.create_model(
-    title="test",
-    description="test",
-    voices=[voice_file.read(), other_voice_file.read()],
-    cover_image=image_file.read(),
-)
-print(model)
-```
+### Optional Dependencies
 
-Or use async version:
+For audio playback utilities:
 
-```python
-import asyncio
+```bash
+pip install fish-audio-sdk[utils]
+```
 
+This installs `sounddevice` and `soundfile` for the `play()` utility function.
 
-async def main():
-    model = await session.create_model.awaitable(
-        title="test",
-        description="test",
-        voices=[voice_file.read(), other_voice_file.read()],
-        cover_image=image_file.read(),
-    )
-    print(model)
+---
 
+## Community & Support
 
-asyncio.run(main())
-```
+- [Discord Community](https://fish.audio) - Join our community for discussions and support
+- [GitHub Issues](https://github.com/fishaudio/fish-audio-python/issues) - Report bugs or request features
+- [Documentation](https://docs.fish.audio) - Comprehensive guides and API reference
 
+---
 
-### Delete a model
+## License
 
-```python
-session.delete_model("your_model_id")
-```
+This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENSE) file for details.
 
-Or use async version:
-
-```python
-import asyncio
+---
 
+## Legacy SDK
 
-async def main():
-    await session.delete_model.awaitable("your_model_id")
+The legacy `fish_audio_sdk` module is still available for existing projects:
 
+```python
+from fish_audio_sdk import Session
 
-asyncio.run(main())
+session = Session("your_api_key")
 ```
+
+We recommend migrating to the new `fishaudio` module for new projects. See our [Migration Guide](https://docs.fish.audio) for assistance.

From 810386ed5994c18cd56f7286e3fa61073a27faca Mon Sep 17 00:00:00 2001
From: James Ding <jamesding365@gmail.com>
Date: Wed, 12 Nov 2025 15:22:03 -0600
Subject: [PATCH 02/16] docs: update README with new badge for Python version
 and improved build status link

---
 README.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index eddc4ec..202c53b 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,11 @@
 # Fish Audio Python SDK
 
+[![Official SDK](https://img.shields.io/badge/Fish_Audio-Official_SDK-21176d?logo=fishaudio&logoColor=fff&logoSize=auto)](https://fish.audio)
 [![PyPI version](https://badge.fury.io/py/fish-audio-sdk.svg)](https://badge.fury.io/py/fish-audio-sdk)
 [![PyPI - Downloads](https://img.shields.io/pypi/dm/fish-audio-sdk)](https://pypi.org/project/fish-audio-sdk/)
-[![Build Status](https://img.shields.io/github/actions/workflow/status/fishaudio/fish-audio-python/ci.yml?branch=main)](https://github.com/fishaudio/fish-audio-python/actions)
-[![codecov](https://codecov.io/gh/fishaudio/fish-audio-python/branch/main/graph/badge.svg)](https://codecov.io/gh/fishaudio/fish-audio-python)
-[![Python Version](https://img.shields.io/pypi/pyversions/fish-audio-sdk)](https://pypi.org/project/fish-audio-sdk/)
+[![Build Status](https://img.shields.io/github/actions/workflow/status/fishaudio/fish-audio-python/python.yml?branch=main)](https://github.com/fishaudio/fish-audio-python/actions)
+[![codecov](https://img.shields.io/codecov/c/github/fishaudio/fish-audio-python)](https://codecov.io/gh/fishaudio/fish-audio-python)
+[![Python Version](https://img.shields.io/badge/python-3.9+-blue)](https://pypi.org/project/fish-audio-sdk/)
 [![License](https://img.shields.io/github/license/fishaudio/fish-audio-python)](https://github.com/fishaudio/fish-audio-python/blob/main/LICENSE)
 
 The official Python library for the Fish Audio API - AI-powered text-to-speech, voice cloning, and speech recognition.

From bf2f5dbe991143bc01c0ad925d647c5ecbf2b0a2 Mon Sep 17 00:00:00 2001
From: James Ding <jamesding365@gmail.com>
Date: Wed, 12 Nov 2025 15:37:39 -0600
Subject: [PATCH 03/16] docs: update README badge to reflect Python SDK
 branding

---
 README.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 202c53b..57314e1 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,8 @@
 # Fish Audio Python SDK
 
-[![Official SDK](https://img.shields.io/badge/Fish_Audio-Official_SDK-21176d?logo=fishaudio&logoColor=fff&logoSize=auto)](https://fish.audio)
+[![Official SDK](https://img.shields.io/badge/Fish_Audio-Python_SDK-21176d?logo=fishaudio&logoColor=fff&logoSize=auto)](https://fish.audio)
 [![PyPI version](https://badge.fury.io/py/fish-audio-sdk.svg)](https://badge.fury.io/py/fish-audio-sdk)
 [![PyPI - Downloads](https://img.shields.io/pypi/dm/fish-audio-sdk)](https://pypi.org/project/fish-audio-sdk/)
-[![Build Status](https://img.shields.io/github/actions/workflow/status/fishaudio/fish-audio-python/python.yml?branch=main)](https://github.com/fishaudio/fish-audio-python/actions)
 [![codecov](https://img.shields.io/codecov/c/github/fishaudio/fish-audio-python)](https://codecov.io/gh/fishaudio/fish-audio-python)
 [![Python Version](https://img.shields.io/badge/python-3.9+-blue)](https://pypi.org/project/fish-audio-sdk/)
 [![License](https://img.shields.io/github/license/fishaudio/fish-audio-python)](https://github.com/fishaudio/fish-audio-python/blob/main/LICENSE)

From cc72716374831afd54ed49259d7f9d5103166d5c Mon Sep 17 00:00:00 2001
From: James Ding <jamesding365@gmail.com>
Date: Wed, 12 Nov 2025 15:43:25 -0600
Subject: [PATCH 04/16] docs: update README to remove redundant badge and
 simplify description

---
 README.md | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 57314e1..d07a39c 100644
--- a/README.md
+++ b/README.md
@@ -1,13 +1,12 @@
 # Fish Audio Python SDK
 
-[![Official SDK](https://img.shields.io/badge/Fish_Audio-Python_SDK-21176d?logo=fishaudio&logoColor=fff&logoSize=auto)](https://fish.audio)
 [![PyPI version](https://badge.fury.io/py/fish-audio-sdk.svg)](https://badge.fury.io/py/fish-audio-sdk)
+[![Python Version](https://img.shields.io/badge/python-3.9+-blue)](https://pypi.org/project/fish-audio-sdk/)
 [![PyPI - Downloads](https://img.shields.io/pypi/dm/fish-audio-sdk)](https://pypi.org/project/fish-audio-sdk/)
 [![codecov](https://img.shields.io/codecov/c/github/fishaudio/fish-audio-python)](https://codecov.io/gh/fishaudio/fish-audio-python)
-[![Python Version](https://img.shields.io/badge/python-3.9+-blue)](https://pypi.org/project/fish-audio-sdk/)
 [![License](https://img.shields.io/github/license/fishaudio/fish-audio-python)](https://github.com/fishaudio/fish-audio-python/blob/main/LICENSE)
 
-The official Python library for the Fish Audio API - AI-powered text-to-speech, voice cloning, and speech recognition.
+The official Python library for the Fish Audio API.
 
 [Documentation](https://docs.fish.audio) | [API Reference](https://docs.fish.audio) | [Examples](./examples/) | [Discord](https://fish.audio)
 

From 62b955ce5f4f7f0078bc908cbccf9f0ab034f2be Mon Sep 17 00:00:00 2001
From: James Ding <jamesding365@gmail.com>
Date: Wed, 12 Nov 2025 21:58:04 -0600
Subject: [PATCH 05/16] docs: update README to reflect new API features and
 improve installation instructions

---
 README.md | 171 +++++++++++++++++++++++++-----------------------------
 1 file changed, 79 insertions(+), 92 deletions(-)

diff --git a/README.md b/README.md
index d07a39c..3ada700 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # Fish Audio Python SDK
 
-[![PyPI version](https://badge.fury.io/py/fish-audio-sdk.svg)](https://badge.fury.io/py/fish-audio-sdk)
+[![PyPI version](https://img.shields.io/pypi/v/fish-audio-sdk.svg)](https://badge.fury.io/py/fish-audio-sdk)
 [![Python Version](https://img.shields.io/badge/python-3.9+-blue)](https://pypi.org/project/fish-audio-sdk/)
 [![PyPI - Downloads](https://img.shields.io/pypi/dm/fish-audio-sdk)](https://pypi.org/project/fish-audio-sdk/)
 [![codecov](https://img.shields.io/codecov/c/github/fishaudio/fish-audio-python)](https://codecov.io/gh/fishaudio/fish-audio-python)
@@ -8,80 +8,81 @@
 
 The official Python library for the Fish Audio API.
 
-[Documentation](https://docs.fish.audio) | [API Reference](https://docs.fish.audio) | [Examples](./examples/) | [Discord](https://fish.audio)
+## Notice: New API Available
 
----
+The SDK now includes a modern `fishaudio` API with improved ergonomics, better type safety, and enhanced features.
 
-## Important: New API Available
+For new projects, use the `fishaudio` module. For existing projects using the legacy API, see the [Legacy SDK section](#legacy-sdk) below
 
-> **We've released a major update to the Fish Audio Python SDK!**
->
-> The new API (`fishaudio` module) offers improved ergonomics, better type safety, and enhanced features. The legacy SDK (`fish_audio_sdk` module) continues to be supported for existing projects, but we recommend using the new API for all new development.
->
-> **Migration:** Both APIs are available in the same package. You can migrate at your own pace. See our [Migration Guide](https://docs.fish.audio) for details.
+## API Documentation
 
----
+See the Python API Documentation and Reference
 
-## Quick Start
+## Installation
 
-### Installation
+This package is available on PyPI:
 
 ```bash
 pip install fish-audio-sdk
 ```
 
-### Basic Usage
+You may install from source by running the following command in the repository root:
 
-```python
-from fishaudio import FishAudio
-from fishaudio.utils import save
-
-# Set your API key via environment variable: export FISH_AUDIO_API_KEY="your-api-key"
-# Or pass it directly: FishAudio(api_key="your-api-key")
-client = FishAudio()
-
-# Convert text to speech
-audio = client.tts.convert(text="Hello from Fish Audio!")
-save(audio, "output.mp3")
+```bash
+python -m pip install .
 ```
 
-[Get your API key](https://fish.audio) | [Full Getting Started Guide](https://docs.fish.audio)
+## Usage
 
----
+The client will need to be configured with an API key, which you can obtain from [Fish Audio](https://fish.audio/app/api-keys).
 
-## Key Features
+```python
+from fishaudio import FishAudio
 
-- **Text-to-Speech** - Natural-sounding voice synthesis with multiple voice options
-- **Voice Cloning** - Create custom voices using reference audio samples
-- **Real-time Streaming** - Low-latency audio generation via WebSocket connections
-- **Speech-to-Text (ASR)** - Accurate automatic speech recognition with language detection
-- **Voice Management** - Create, update, and organize custom voice models
-- **Sync and Async APIs** - Full support for both synchronous and asynchronous operations
-- **Type Safety** - Complete type hints with Pydantic models throughout
+client = FishAudio() # Automatically reads from the FISH_AUDIO_API_KEY environment variable
 
----
+client = FishAudio(api_key="your-api-key") # Or provide the API key directly
+```
 
-## Examples
+The SDK provides [text-to-speech](#text-to-speech), [voice cloning](#instant-voice-cloning), [speech recognition](#speech-recognition-asr), and [voice management](#voice-management) capabilities.
 
 ### Text-to-Speech
 
+Convert text to natural-sounding speech with support for multiple voices, formats, and real-time streaming.
+
+#### Basic
+
 ```python
 from fishaudio import FishAudio
-from fishaudio.utils import save
+from fishaudio.utils import save, play
 
 client = FishAudio()
-audio = client.tts.convert(text="Hello, world!")
-save(audio, "output.mp3")
+
+audio = client.tts.convert(text="Hello, world!") # Default voice and settings
+play(audio)  # Play audio directly
+
+audio = client.tts.convert(text="Welcome to Fish Audio SDK!")
+save(audio, "output.mp3") # You can also save to a file
 ```
 
-### Voice Cloning with Reference Audio
+#### With Reference Voice
+
+Use a reference voice ID to ensure consistent voice characteristics across generations:
 
 ```python
-from fishaudio import FishAudio
+# Use an existing voice by ID
+audio = client.tts.convert(
+    text="This will sound like the reference voice!",
+    reference_id="802e3bc2b27e49c2995d23ef70e6ac89" # Energetic Male
+)
+```
 
-client = FishAudio()
+#### Instant Voice Cloning
+
+Immediately clone a voice from a short audio sample:
 
-# Use a reference voice for cloning
+```python
+# Clone a voice from audio sample
 with open("reference.wav", "rb") as f:
     audio = client.tts.convert(
         text="This will sound like the reference voice!",
@@ -90,7 +91,9 @@ with open("reference.wav", "rb") as f:
     )
 ```
 
-### Real-time Streaming
+#### Real-time Streaming
+
+For low-latency and real-time applications, stream audio as text is processed:
 
 ```python
 from fishaudio import FishAudio
@@ -98,17 +101,20 @@ from fishaudio.utils import play
 
 client = FishAudio()
 
-# Stream audio in real-time
-audio_stream = client.tts.stream(
-    text="This audio streams as it's generated",
-    latency="balanced"
-)
+# Stream text chunks and receive audio in real-time
+def text_chunks():
+    yield "Hello, "
+    yield "this is "
+    yield "streaming audio!"
 
+audio_stream = client.tts.stream_websocket(text_chunks(), latency="balanced")
 play(audio_stream)
 ```
 
 ### Speech Recognition (ASR)
 
+To transcribe audio to text:
+
 ```python
 from fishaudio import FishAudio
 
@@ -120,7 +126,9 @@ with open("audio.wav", "rb") as f:
     print(result.text)
 ```
 
-### List and Filter Voices
+### Voice Management
+
+Manage voice references and list available voices.
 
 ```python
 from fishaudio import FishAudio
@@ -128,14 +136,24 @@ from fishaudio import FishAudio
 client = FishAudio()
 
 # List available voices
-voices = client.voices.list(language="en")
+voices = client.voices.list(language="en", tags="male")
+
+# Get a specific voice by ID
+voice = client.voices.get(voice_id="802e3bc2b27e49c2995d23ef70e6ac89")
 
-for voice in voices:
-    print(f"{voice.title} - {voice.id}")
+# Create a custom voice
+with open("voice_sample.wav", "rb") as f:
+    new_voice = client.voices.create(
+        title="My Custom Voice",
+        voices=[f.read()],
+        description="My cloned voice"
+    )
 ```
 
 ### Async Usage
 
+You can also use the SDK in asynchronous applications:
+
 ```python
 import asyncio
 from fishaudio import AsyncFishAudio
@@ -149,7 +167,9 @@ async def main():
 asyncio.run(main())
 ```
 
-### Check Account Credits
+### Account
+
+Check your remaining API credits, usage, and account details:
 
 ```python
 from fishaudio import FishAudio
@@ -159,53 +179,18 @@ credits = client.account.get_credits()
 print(f"Remaining credits: {credits.credit}")
 ```
 
-[More examples in /examples directory](./examples/)
-
----
-
-## Documentation
-
-- [API Reference](https://docs.fish.audio) - Complete API documentation with all parameters and options
-- [Tutorials & Guides](https://docs.fish.audio) - Step-by-step tutorials for common use cases
-- [Examples](./examples/) - Sample code demonstrating various features
-- [Migration Guide](https://docs.fish.audio) - Guide for upgrading from the legacy SDK
-
----
-
-## Requirements
-
-- Python 3.9 or higher
-- Fish Audio API key - [Get one here](https://fish.audio)
 
 ### Optional Dependencies
 
-For audio playback utilities:
+For audio playback utilities to help with playing and saving audio files, install the `utils` extra:
 
 ```bash
 pip install fish-audio-sdk[utils]
 ```
 
-This installs `sounddevice` and `soundfile` for the `play()` utility function.
-
----
-
-## Community & Support
-
-- [Discord Community](https://fish.audio) - Join our community for discussions and support
-- [GitHub Issues](https://github.com/fishaudio/fish-audio-python/issues) - Report bugs or request features
-- [Documentation](https://docs.fish.audio) - Comprehensive guides and API reference
-
----
-
-## License
-
-This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENSE) file for details.
-
----
-
 ## Legacy SDK
 
-The legacy `fish_audio_sdk` module is still available for existing projects:
+The legacy `fish_audio_sdk` module continues to be supported for existing projects:
 
 ```python
 from fish_audio_sdk import Session
@@ -213,4 +198,6 @@ from fish_audio_sdk import Session
 session = Session("your_api_key")
 ```
 
-We recommend migrating to the new `fishaudio` module for new projects. See our [Migration Guide](https://docs.fish.audio) for assistance.
+For complete legacy SDK documentation, see the [Legacy API Documentation](https://docs.fish.audio/legacy).
+
+We recommend migrating to the new `fishaudio` module - see our [Migration Guide](https://docs.fish.audio) for assistance.

From cc8416b971f845b25eebc46ea0ad99935598ad7d Mon Sep 17 00:00:00 2001
From: James Ding <jamesding365@gmail.com>
Date: Wed, 12 Nov 2025 22:04:43 -0600
Subject: [PATCH 06/16] docs: update README to provide direct links to Python
 SDK Guide and API Reference

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 3ada700..0acd0ab 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ For new projects, use the `fishaudio` module. For existing projects using the le
 
 ## API Documentation
 
-See the Python API Documentation and Reference
+For complete documentation and API reference, visit the [Python SDK Guide](https://docs.fish.audio/developer-guide/sdk-guide/python/) and [API Reference](https://docs.fish.audio/api-reference/sdk/python/).
 
 ## Installation
 

From 3cc4904e116374c0660b9c11e970804d1b3248d3 Mon Sep 17 00:00:00 2001
From: James Ding <jamesding365@gmail.com>
Date: Thu, 13 Nov 2025 00:04:53 -0600
Subject: [PATCH 07/16] docs: update title in copy_docs.py from "Python SDK" to
 "Overview"

---
 scripts/copy_docs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/copy_docs.py b/scripts/copy_docs.py
index a2587eb..ea82907 100644
--- a/scripts/copy_docs.py
+++ b/scripts/copy_docs.py
@@ -142,7 +142,7 @@ def copy_docs(sdk_root: Path, docs_root: Path) -> None:
         python_sdk_dir,
         lambda content: add_frontmatter(
             content,
-            title="Python SDK",
+            title="Overview",
             description="Fish Audio Python SDK for text-to-speech and voice cloning",
             icon="python",
         ),

From 27a99094887a78c8df64a7f3980759fc317d9b9e Mon Sep 17 00:00:00 2001
From: James Ding <jamesding365@gmail.com>
Date: Thu, 13 Nov 2025 00:39:16 -0600
Subject: [PATCH 08/16] chore: update development status to Production/Stable
 in pyproject.toml

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8aa9dfb..49c8d9d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,7 +17,7 @@ readme = "README.md"
 license = {text = "Apache-2.0"}
 keywords = ["fish-audio", "tts", "text-to-speech", "voice-cloning", "ai", "speech-synthesis"]
 classifiers = [
-    "Development Status :: 4 - Beta",
+    "Development Status :: 5 - Production/Stable",
     "Intended Audience :: Developers",
     "License :: OSI Approved :: Apache Software License",
     "Programming Language :: Python :: 3",

From 6228352d673064b6eb58c113062a738a46884c88 Mon Sep 17 00:00:00 2001
From: James Ding <jamesding365@gmail.com>
Date: Thu, 13 Nov 2025 01:41:11 -0600
Subject: [PATCH 09/16] chore: rename environment variable from
 FISH_AUDIO_API_KEY to FISH_API_KEY across the codebase

---
 .env.example                                 | 2 +-
 .github/workflows/python.yml                 | 2 +-
 README.md                                    | 2 +-
 examples/README.md                           | 2 +-
 examples/getting-started/01_simple_tts.py    | 6 +++---
 examples/getting-started/02_play_audio.py    | 4 ++--
 examples/getting-started/03_check_credits.py | 6 +++---
 src/fishaudio/client.py                      | 4 ++--
 src/fishaudio/core/client_wrapper.py         | 4 ++--
 tests/integration/conftest.py                | 4 ++--
 tests/unit/test_client.py                    | 2 +-
 tests/unit/test_core.py                      | 2 +-
 12 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/.env.example b/.env.example
index 9ab9793..8ccad9e 100644
--- a/.env.example
+++ b/.env.example
@@ -1 +1 @@
-FISH_AUDIO_API_KEY=
\ No newline at end of file
+FISH_API_KEY=
\ No newline at end of file
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 619f999..79de8c8 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -83,7 +83,7 @@ jobs:
       - name: Run integration tests
         run: uv run pytest tests/integration/ -v
         env:
-          FISH_AUDIO_API_KEY: ${{ secrets.FISH_AUDIO_API_KEY }}
+          FISH_API_KEY: ${{ secrets.FISH_API_KEY }}
 
       - name: Upload Test Artifacts
         uses: actions/upload-artifact@v4
diff --git a/README.md b/README.md
index 0acd0ab..b6814e6 100644
--- a/README.md
+++ b/README.md
@@ -39,7 +39,7 @@ The client will need to be configured with an API key, which you can obtain from
 ```python
 from fishaudio import FishAudio
 
-client = FishAudio() # Automatically reads from the FISH_AUDIO_API_KEY environment variable
+client = FishAudio() # Automatically reads from the FISH_API_KEY environment variable
 
 client = FishAudio(api_key="your-api-key") # Or provide the API key directly
 ```
diff --git a/examples/README.md b/examples/README.md
index cc7510d..1132cd1 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -5,5 +5,5 @@ Example scripts demonstrating how to use the Fish Audio Python SDK.
 ```bash
 # Install and setup
 pip install fishaudio
-export FISH_AUDIO_API_KEY="your_api_key"
+export FISH_API_KEY="your_api_key"
 ```
\ No newline at end of file
diff --git a/examples/getting-started/01_simple_tts.py b/examples/getting-started/01_simple_tts.py
index f312848..2574ce7 100644
--- a/examples/getting-started/01_simple_tts.py
+++ b/examples/getting-started/01_simple_tts.py
@@ -10,7 +10,7 @@
     pip install fishaudio
 
 Environment Setup:
-    export FISH_AUDIO_API_KEY="your_api_key_here"
+    export FISH_API_KEY="your_api_key_here"
     # Or pass api_key directly to the client
 
 Expected Output:
@@ -25,7 +25,7 @@
 
 def main():
     # Initialize the client with your API key
-    # Option 1: Use environment variable FISH_AUDIO_API_KEY
+    # Option 1: Use environment variable FISH_API_KEY
     # Option 2: Pass api_key directly: FishAudio(api_key="your_key")
     client = FishAudio()
 
@@ -52,4 +52,4 @@ def main():
     except Exception as e:
         print(f"Error: {e}")
         print("\nMake sure you have set your API key:")
-        print("  export FISH_AUDIO_API_KEY='your_api_key'")
+        print("  export FISH_API_KEY='your_api_key'")
diff --git a/examples/getting-started/02_play_audio.py b/examples/getting-started/02_play_audio.py
index 5b62750..34c8e29 100644
--- a/examples/getting-started/02_play_audio.py
+++ b/examples/getting-started/02_play_audio.py
@@ -19,7 +19,7 @@
     #   pip install sounddevice soundfile
 
 Environment Setup:
-    export FISH_AUDIO_API_KEY="your_api_key_here"
+    export FISH_API_KEY="your_api_key_here"
 
 Expected Output:
     - Plays the generated audio through your speakers
@@ -98,7 +98,7 @@ def demo_playback_methods():
     except Exception as e:
         print(f"Error: {e}")
         print("\nTroubleshooting:")
-        print("1. Make sure your API key is set: export FISH_AUDIO_API_KEY='your_key'")
+        print("1. Make sure your API key is set: export FISH_API_KEY='your_key'")
         print("2. Install ffmpeg for audio playback:")
         print("   - macOS: brew install ffmpeg")
         print("   - Ubuntu: sudo apt install ffmpeg")
diff --git a/examples/getting-started/03_check_credits.py b/examples/getting-started/03_check_credits.py
index 68fc721..87412ec 100644
--- a/examples/getting-started/03_check_credits.py
+++ b/examples/getting-started/03_check_credits.py
@@ -13,7 +13,7 @@
     pip install fishaudio
 
 Environment Setup:
-    export FISH_AUDIO_API_KEY="your_api_key_here"
+    export FISH_API_KEY="your_api_key_here"
 
 Expected Output:
     - Displays account credit balance
@@ -84,7 +84,7 @@ def check_api_setup():
         print(f"  Error: {e}")
         print("\nPlease check:")
         print("  1. Your API key is correct")
-        print("  2. Environment variable is set: export FISH_AUDIO_API_KEY='your_key'")
+        print("  2. Environment variable is set: export FISH_API_KEY='your_key'")
         print("  3. You have an active internet connection")
         return False
 
@@ -100,6 +100,6 @@ def check_api_setup():
     except Exception as e:
         print(f"\nError: {e}")
         print("\nMake sure you have set your API key:")
-        print("  export FISH_AUDIO_API_KEY='your_api_key'")
+        print("  export FISH_API_KEY='your_api_key'")
         print("\nOr pass it directly when creating the client:")
         print("  client = FishAudio(api_key='your_api_key')")
diff --git a/src/fishaudio/client.py b/src/fishaudio/client.py
index 5a914cf..53be1ec 100644
--- a/src/fishaudio/client.py
+++ b/src/fishaudio/client.py
@@ -51,7 +51,7 @@ def __init__(
         Initialize Fish Audio client.
 
         Args:
-            api_key: API key (can also use FISH_AUDIO_API_KEY env var)
+            api_key: API key (can also use FISH_API_KEY env var)
             base_url: API base URL
             timeout: Request timeout in seconds
             httpx_client: Optional custom HTTP client
@@ -145,7 +145,7 @@ def __init__(
         Initialize async Fish Audio client.
 
         Args:
-            api_key: API key (can also use FISH_AUDIO_API_KEY env var)
+            api_key: API key (can also use FISH_API_KEY env var)
             base_url: API base URL
             timeout: Request timeout in seconds
             httpx_client: Optional custom async HTTP client
diff --git a/src/fishaudio/core/client_wrapper.py b/src/fishaudio/core/client_wrapper.py
index 2173f28..f1232f7 100644
--- a/src/fishaudio/core/client_wrapper.py
+++ b/src/fishaudio/core/client_wrapper.py
@@ -53,10 +53,10 @@ def __init__(
         api_key: Optional[str] = None,
         base_url: str = "https://api.fish.audio",
     ):
-        self.api_key = api_key or os.getenv("FISH_AUDIO_API_KEY")
+        self.api_key = api_key or os.getenv("FISH_API_KEY")
         if not self.api_key:
             raise ValueError(
-                "API key must be provided either as argument or via FISH_AUDIO_API_KEY environment variable"
+                "API key must be provided either as argument or via FISH_API_KEY environment variable"
             )
         self.base_url = base_url
 
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 00ec2d8..c2b2094 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -21,9 +21,9 @@
 @pytest.fixture
 def api_key():
     """Get API key from environment."""
-    key = os.getenv("FISH_AUDIO_API_KEY")
+    key = os.getenv("FISH_API_KEY")
     if not key:
-        pytest.skip("No API key available (set FISH_AUDIO_API_KEY)")
+        pytest.skip("No API key available (set FISH_API_KEY)")
     return key
 
 
diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py
index f1aa2c0..d288491 100644
--- a/tests/unit/test_client.py
+++ b/tests/unit/test_client.py
@@ -20,7 +20,7 @@ def test_init_with_api_key(self, mock_api_key):
         assert client._client_wrapper.api_key == mock_api_key
 
     def test_init_with_env_var(self, mock_api_key):
-        with patch.dict("os.environ", {"FISH_AUDIO_API_KEY": mock_api_key}):
+        with patch.dict("os.environ", {"FISH_API_KEY": mock_api_key}):
             client = FishAudio()
             assert client._client_wrapper.api_key == mock_api_key
 
diff --git a/tests/unit/test_core.py b/tests/unit/test_core.py
index 76a3611..f77dc04 100644
--- a/tests/unit/test_core.py
+++ b/tests/unit/test_core.py
@@ -67,7 +67,7 @@ def test_init_without_api_key_raises(self):
                 ClientWrapper()
 
     def test_init_with_env_var(self, mock_api_key):
-        with patch.dict("os.environ", {"FISH_AUDIO_API_KEY": mock_api_key}):
+        with patch.dict("os.environ", {"FISH_API_KEY": mock_api_key}):
             wrapper = ClientWrapper()
             assert wrapper.api_key == mock_api_key
 

From b5b525838ba0fdd7535704c1324d7f74aa88eff8 Mon Sep 17 00:00:00 2001
From: James Ding <jamesding365@gmail.com>
Date: Thu, 13 Nov 2025 03:44:44 -0600
Subject: [PATCH 10/16] feat: add streaming support for text-to-speech with
 AudioStream and AsyncAudioStream classes

---
 README.md                                 |  18 +-
 src/fishaudio/__init__.py                 |   4 +
 src/fishaudio/core/iterators.py           | 113 ++++++++++
 src/fishaudio/resources/tts.py            | 244 ++++++++++++++--------
 tests/integration/conftest.py             |  11 +-
 tests/integration/test_asr_integration.py |  12 +-
 tests/integration/test_tts_integration.py |  67 +++---
 tests/unit/test_tts.py                    | 179 ++++++++--------
 8 files changed, 417 insertions(+), 231 deletions(-)
 create mode 100644 src/fishaudio/core/iterators.py

diff --git a/README.md b/README.md
index b6814e6..c57d150 100644
--- a/README.md
+++ b/README.md
@@ -91,9 +91,23 @@ with open("reference.wav", "rb") as f:
     )
 ```
 
-#### Real-time Streaming
+#### Streaming Audio Chunks
 
-For low-latency and real-time applications, stream audio as text is processed:
+For processing audio chunks as they're generated:
+
+```python
+# Stream and process audio chunks
+for chunk in client.tts.stream(text="Long text content..."):
+    # Process each chunk as it arrives
+    send_to_websocket(chunk)
+
+# Or collect all chunks
+audio = client.tts.stream(text="Hello!").collect()
+```
+
+#### Real-time WebSocket Streaming
+
+For low-latency bidirectional streaming where you send text chunks and receive audio in real-time:
 
 ```python
 from fishaudio import FishAudio
diff --git a/src/fishaudio/__init__.py b/src/fishaudio/__init__.py
index bf33f15..dcedf83 100644
--- a/src/fishaudio/__init__.py
+++ b/src/fishaudio/__init__.py
@@ -28,6 +28,7 @@
 
 from ._version import __version__
 from .client import AsyncFishAudio, FishAudio
+from .core.iterators import AsyncAudioStream, AudioStream
 from .exceptions import (
     APIError,
     AuthenticationError,
@@ -52,6 +53,9 @@
     "play",
     "save",
     "stream",
+    # Audio streams
+    "AudioStream",
+    "AsyncAudioStream",
     # Types
     "FlushEvent",
     "TextEvent",
diff --git a/src/fishaudio/core/iterators.py b/src/fishaudio/core/iterators.py
new file mode 100644
index 0000000..971618a
--- /dev/null
+++ b/src/fishaudio/core/iterators.py
@@ -0,0 +1,113 @@
+"""Audio stream wrappers with collection utilities."""
+
+from typing import AsyncIterator, Iterator
+
+
+class AudioStream:
+    """Wrapper for sync audio byte streams with collection utilities.
+
+    This class wraps an iterator of audio bytes and provides a convenient
+    `.collect()` method to gather all chunks into a single bytes object.
+
+    Examples:
+        ```python
+        from fishaudio import FishAudio
+
+        client = FishAudio(api_key="...")
+
+        # Collect all audio at once
+        audio = client.tts.convert(text="Hello!").collect()
+
+        # Or stream chunks manually
+        for chunk in client.tts.convert(text="Hello!"):
+            process_chunk(chunk)
+        ```
+    """
+
+    def __init__(self, iterator: Iterator[bytes]):
+        """Initialize the audio iterator wrapper.
+
+        Args:
+            iterator: The underlying iterator of audio bytes
+        """
+        self._iter = iterator
+
+    def __iter__(self) -> Iterator[bytes]:
+        """Allow direct iteration over audio chunks."""
+        return self._iter
+
+    def collect(self) -> bytes:
+        """Collect all audio chunks into a single bytes object.
+
+        This consumes the iterator and returns all audio data as bytes.
+        After calling this method, the iterator cannot be used again.
+
+        Returns:
+            Complete audio data as bytes
+
+        Examples:
+            ```python
+            audio = client.tts.convert(text="Hello!").collect()
+            with open("output.mp3", "wb") as f:
+                f.write(audio)
+            ```
+        """
+        chunks = []
+        for chunk in self._iter:
+            chunks.append(chunk)
+        return b"".join(chunks)
+
+
+class AsyncAudioStream:
+    """Wrapper for async audio byte streams with collection utilities.
+
+    This class wraps an async iterator of audio bytes and provides a convenient
+    `.collect()` method to gather all chunks into a single bytes object.
+
+    Examples:
+        ```python
+        from fishaudio import AsyncFishAudio
+
+        client = AsyncFishAudio(api_key="...")
+
+        # Collect all audio at once
+        audio = await client.tts.convert(text="Hello!").collect()
+
+        # Or stream chunks manually
+        async for chunk in client.tts.convert(text="Hello!"):
+            await process_chunk(chunk)
+        ```
+    """
+
+    def __init__(self, async_iterator: AsyncIterator[bytes]):
+        """Initialize the async audio iterator wrapper.
+
+        Args:
+            async_iterator: The underlying async iterator of audio bytes
+        """
+        self._iter = async_iterator
+
+    def __aiter__(self) -> AsyncIterator[bytes]:
+        """Allow direct async iteration over audio chunks."""
+        return self._iter
+
+    async def collect(self) -> bytes:
+        """Collect all audio chunks into a single bytes object.
+
+        This consumes the async iterator and returns all audio data as bytes.
+        After calling this method, the iterator cannot be used again.
+
+        Returns:
+            Complete audio data as bytes
+
+        Examples:
+            ```python
+            audio = await client.tts.convert(text="Hello!").collect()
+            with open("output.mp3", "wb") as f:
+                f.write(audio)
+            ```
+        """
+        chunks = []
+        async for chunk in self._iter:
+            chunks.append(chunk)
+        return b"".join(chunks)
diff --git a/src/fishaudio/resources/tts.py b/src/fishaudio/resources/tts.py
index fef1cd4..578b676 100644
--- a/src/fishaudio/resources/tts.py
+++ b/src/fishaudio/resources/tts.py
@@ -9,6 +9,7 @@
 
 from .realtime import aiter_websocket_audio, iter_websocket_audio
 from ..core import AsyncClientWrapper, ClientWrapper, RequestOptions
+from ..core.iterators import AsyncAudioStream, AudioStream
 from ..types import (
     AudioFormat,
     CloseEvent,
@@ -58,7 +59,7 @@ class TTSClient:
     def __init__(self, client_wrapper: ClientWrapper):
         self._client = client_wrapper
 
-    def convert(
+    def stream(
         self,
         *,
         text: str,
@@ -70,9 +71,9 @@ def convert(
         config: TTSConfig = TTSConfig(),
         model: Model = "s1",
         request_options: Optional[RequestOptions] = None,
-    ) -> Iterator[bytes]:
+    ) -> AudioStream:
         """
-        Convert text to speech.
+        Stream text-to-speech audio chunks.
 
         Args:
             text: Text to synthesize
@@ -86,48 +87,20 @@ def convert(
             request_options: Request-level overrides
 
         Returns:
-            Iterator of audio bytes
+            AudioStream object that can be iterated for audio chunks
 
         Example:
             ```python
-            from fishaudio import FishAudio, TTSConfig, ReferenceAudio
+            from fishaudio import FishAudio
 
             client = FishAudio(api_key="...")
 
-            # Simple usage with defaults
-            audio = client.tts.convert(text="Hello world")
-
-            # With format parameter
-            audio = client.tts.convert(text="Hello world", format="wav")
-
-            # With speed parameter
-            audio = client.tts.convert(text="Hello world", speed=1.5)
-
-            # With reference_id parameter
-            audio = client.tts.convert(text="Hello world", reference_id="your_model_id")
-
-            # With references parameter
-            audio = client.tts.convert(
-                text="Hello world",
-                references=[ReferenceAudio(audio=audio_bytes, text="sample")]
-            )
-
-            # Combine multiple parameters
-            audio = client.tts.convert(
-                text="Hello world",
-                format="wav",
-                speed=1.2,
-                latency="normal"
-            )
-
-            # Parameters override config values
-            config = TTSConfig(format="mp3", prosody=Prosody(speed=1.0))
-            audio = client.tts.convert(text="Hello world", format="wav", config=config)
-            # Result: format="wav" (parameter wins)
+            # Stream and process chunks
+            for chunk in client.tts.stream(text="Hello world"):
+                process_audio_chunk(chunk)
 
-            with open("output.mp3", "wb") as f:
-                for chunk in audio:
-                    f.write(chunk)
+            # Or collect all at once
+            audio = client.tts.stream(text="Hello world").collect()
             ```
         """
         # Build request payload from config
@@ -160,10 +133,75 @@ def convert(
             request_options=request_options,
         )
 
-        # Stream response chunks
-        for chunk in response.iter_bytes():
-            if chunk:
-                yield chunk
+        # Create generator and wrap with AudioStream
+        def _stream():
+            for chunk in response.iter_bytes():
+                if chunk:
+                    yield chunk
+
+        return AudioStream(_stream())
+
+    def convert(
+        self,
+        *,
+        text: str,
+        reference_id: Optional[str] = None,
+        references: Optional[List[ReferenceAudio]] = None,
+        format: Optional[AudioFormat] = None,
+        latency: Optional[LatencyMode] = None,
+        speed: Optional[float] = None,
+        config: TTSConfig = TTSConfig(),
+        model: Model = "s1",
+        request_options: Optional[RequestOptions] = None,
+    ) -> bytes:
+        """
+        Convert text to speech and return complete audio as bytes.
+
+        This is a convenience method that streams all audio chunks and combines them.
+        For chunk-by-chunk processing, use stream() instead.
+
+        Args:
+            text: Text to synthesize
+            reference_id: Voice reference ID (overrides config.reference_id if provided)
+            references: Reference audio samples (overrides config.references if provided)
+            format: Audio format - "mp3", "wav", "pcm", or "opus" (overrides config.format if provided)
+            latency: Latency mode - "normal" or "balanced" (overrides config.latency if provided)
+            speed: Speech speed multiplier, e.g. 1.5 for 1.5x speed (overrides config.prosody.speed if provided)
+            config: TTS configuration (audio settings, voice, model parameters)
+            model: TTS model to use
+            request_options: Request-level overrides
+
+        Returns:
+            Complete audio as bytes
+
+        Example:
+            ```python
+            from fishaudio import FishAudio
+            from fishaudio.utils import play, save
+
+            client = FishAudio(api_key="...")
+
+            # Get complete audio
+            audio = client.tts.convert(text="Hello world")
+
+            # Play it
+            play(audio)
+
+            # Or save it
+            save(audio, "output.mp3")
+            ```
+        """
+        return self.stream(
+            text=text,
+            reference_id=reference_id,
+            references=references,
+            format=format,
+            latency=latency,
+            speed=speed,
+            config=config,
+            model=model,
+            request_options=request_options,
+        ).collect()
 
     def stream_websocket(
         self,
@@ -307,7 +345,7 @@ class AsyncTTSClient:
     def __init__(self, client_wrapper: AsyncClientWrapper):
         self._client = client_wrapper
 
-    async def convert(
+    async def stream(
         self,
         *,
         text: str,
@@ -319,9 +357,9 @@ async def convert(
         config: TTSConfig = TTSConfig(),
         model: Model = "s1",
         request_options: Optional[RequestOptions] = None,
-    ):
+    ) -> AsyncAudioStream:
         """
-        Convert text to speech (async).
+        Stream text-to-speech audio chunks (async).
 
         Args:
             text: Text to synthesize
@@ -335,48 +373,20 @@ async def convert(
             request_options: Request-level overrides
 
         Returns:
-            Async iterator of audio bytes
+            AsyncAudioStream object that can be iterated for audio chunks
 
         Example:
             ```python
-            from fishaudio import AsyncFishAudio, TTSConfig, ReferenceAudio
+            from fishaudio import AsyncFishAudio
 
             client = AsyncFishAudio(api_key="...")
 
-            # Simple usage with defaults
-            audio = await client.tts.convert(text="Hello world")
-
-            # With format parameter
-            audio = await client.tts.convert(text="Hello world", format="wav")
-
-            # With speed parameter
-            audio = await client.tts.convert(text="Hello world", speed=1.5)
-
-            # With reference_id parameter
-            audio = await client.tts.convert(text="Hello world", reference_id="your_model_id")
-
-            # With references parameter
-            audio = await client.tts.convert(
-                text="Hello world",
-                references=[ReferenceAudio(audio=audio_bytes, text="sample")]
-            )
-
-            # Combine multiple parameters
-            audio = await client.tts.convert(
-                text="Hello world",
-                format="wav",
-                speed=1.2,
-                latency="normal"
-            )
-
-            # Parameters override config values
-            config = TTSConfig(format="mp3", prosody=Prosody(speed=1.0))
-            audio = await client.tts.convert(text="Hello world", format="wav", config=config)
-            # Result: format="wav" (parameter wins)
+            # Stream and process chunks
+            async for chunk in client.tts.stream(text="Hello world"):
+                await process_audio_chunk(chunk)
 
-            async with aiofiles.open("output.mp3", "wb") as f:
-                async for chunk in audio:
-                    await f.write(chunk)
+            # Or collect all at once
+            audio = await client.tts.stream(text="Hello world").collect()
             ```
         """
         # Build request payload from config
@@ -409,10 +419,76 @@ async def convert(
             request_options=request_options,
         )
 
-        # Stream response chunks
-        async for chunk in response.aiter_bytes():
-            if chunk:
-                yield chunk
+        # Create async generator and wrap with AsyncAudioStream
+        async def _stream():
+            async for chunk in response.aiter_bytes():
+                if chunk:
+                    yield chunk
+
+        return AsyncAudioStream(_stream())
+
+    async def convert(
+        self,
+        *,
+        text: str,
+        reference_id: Optional[str] = None,
+        references: Optional[List[ReferenceAudio]] = None,
+        format: Optional[AudioFormat] = None,
+        latency: Optional[LatencyMode] = None,
+        speed: Optional[float] = None,
+        config: TTSConfig = TTSConfig(),
+        model: Model = "s1",
+        request_options: Optional[RequestOptions] = None,
+    ) -> bytes:
+        """
+        Convert text to speech and return complete audio as bytes (async).
+
+        This is a convenience method that streams all audio chunks and combines them.
+        For chunk-by-chunk processing, use stream() instead.
+
+        Args:
+            text: Text to synthesize
+            reference_id: Voice reference ID (overrides config.reference_id if provided)
+            references: Reference audio samples (overrides config.references if provided)
+            format: Audio format - "mp3", "wav", "pcm", or "opus" (overrides config.format if provided)
+            latency: Latency mode - "normal" or "balanced" (overrides config.latency if provided)
+            speed: Speech speed multiplier, e.g. 1.5 for 1.5x speed (overrides config.prosody.speed if provided)
+            config: TTS configuration (audio settings, voice, model parameters)
+            model: TTS model to use
+            request_options: Request-level overrides
+
+        Returns:
+            Complete audio as bytes
+
+        Example:
+            ```python
+            from fishaudio import AsyncFishAudio
+            from fishaudio.utils import play, save
+
+            client = AsyncFishAudio(api_key="...")
+
+            # Get complete audio
+            audio = await client.tts.convert(text="Hello world")
+
+            # Play it
+            play(audio)
+
+            # Or save it
+            save(audio, "output.mp3")
+            ```
+        """
+        stream = await self.stream(
+            text=text,
+            reference_id=reference_id,
+            references=references,
+            format=format,
+            latency=latency,
+            speed=speed,
+            config=config,
+            model=model,
+            request_options=request_options,
+        )
+        return await stream.collect()
 
     async def stream_websocket(
         self,
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index c2b2094..7cc0ef1 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -55,17 +55,20 @@ def save_audio():
         A callable that takes audio chunks and filename and saves to output/
     """
 
-    def _save(audio_chunks: list[bytes], filename: str) -> Path:
-        """Save audio chunks to output directory.
+    def _save(audio: bytes | list[bytes], filename: str) -> Path:
+        """Save audio to output directory.
 
         Args:
-            audio_chunks: List of audio byte chunks
+            audio: Audio bytes or list of audio byte chunks
             filename: Name of the output file (including extension)
 
         Returns:
             Path to the saved file
         """
-        complete_audio = b"".join(audio_chunks)
+        if isinstance(audio, bytes):
+            complete_audio = audio
+        else:
+            complete_audio = b"".join(audio)
         output_file = OUTPUT_DIR / filename
         output_file.write_bytes(complete_audio)
         return output_file
diff --git a/tests/integration/test_asr_integration.py b/tests/integration/test_asr_integration.py
index 953f7c8..7e2602e 100644
--- a/tests/integration/test_asr_integration.py
+++ b/tests/integration/test_asr_integration.py
@@ -13,10 +13,7 @@ def sample_audio(self, client):
         """Generate sample audio for ASR testing."""
         # Generate audio from known text
         config = TTSConfig(format="wav")
-        audio_chunks = list(
-            client.tts.convert(text="Hello world, this is a test.", config=config)
-        )
-        return b"".join(audio_chunks)
+        return client.tts.convert(text="Hello world, this is a test.", config=config)
 
     def test_basic_asr(self, client, sample_audio):
         """Test basic speech-to-text transcription."""
@@ -54,13 +51,8 @@ class TestAsyncASRIntegration:
     @pytest.fixture
     async def async_sample_audio(self, async_client):
         """Generate sample audio for async ASR testing."""
-        audio_chunks = []
         config = TTSConfig(format="wav")
-        async for chunk in async_client.tts.convert(
-            text="Async test audio", config=config
-        ):
-            audio_chunks.append(chunk)
-        return b"".join(audio_chunks)
+        return await async_client.tts.convert(text="Async test audio", config=config)
 
     @pytest.mark.asyncio
     async def test_async_basic_asr(self, async_client, async_sample_audio):
diff --git a/tests/integration/test_tts_integration.py b/tests/integration/test_tts_integration.py
index 8d00d77..f6b4fc2 100644
--- a/tests/integration/test_tts_integration.py
+++ b/tests/integration/test_tts_integration.py
@@ -13,15 +13,13 @@ class TestTTSIntegration:
 
     def test_basic_tts(self, client, save_audio):
         """Test basic text-to-speech generation."""
-        audio_chunks = list(client.tts.convert(text="Hello, this is a test."))
+        audio = client.tts.convert(text="Hello, this is a test.")
 
-        assert len(audio_chunks) > 0
-        # Verify we got audio data (check for common audio headers)
-        complete_audio = b"".join(audio_chunks)
-        assert len(complete_audio) > 1000  # Should have substantial audio data
+        assert len(audio) > 1000  # Should have substantial audio data
+        assert isinstance(audio, bytes)
 
         # Write to output directory
-        save_audio(audio_chunks, "test_basic_tts.mp3")
+        save_audio(audio, "test_basic_tts.mp3")
 
     def test_tts_with_different_formats(self, client, save_audio):
         """Test TTS with different audio formats."""
@@ -29,27 +27,23 @@ def test_tts_with_different_formats(self, client, save_audio):
 
         for fmt in formats:
             config = TTSConfig(format=fmt, chunk_length=100)
-            audio_chunks = list(
-                client.tts.convert(text=f"Testing format {fmt}", config=config)
-            )
-            assert len(audio_chunks) > 0, f"Failed for format: {fmt}"
+            audio = client.tts.convert(text=f"Testing format {fmt}", config=config)
+            assert len(audio) > 0, f"Failed for format: {fmt}"
 
             # Write to output directory
-            save_audio(audio_chunks, f"test_format_{fmt}.{fmt}")
+            save_audio(audio, f"test_format_{fmt}.{fmt}")
 
     def test_tts_with_prosody(self, client, save_audio):
         """Test TTS with prosody settings."""
         prosody = Prosody(speed=1.2, volume=0.5)
         config = TTSConfig(prosody=prosody)
 
-        audio_chunks = list(
-            client.tts.convert(text="Testing prosody settings", config=config)
-        )
+        audio = client.tts.convert(text="Testing prosody settings", config=config)
 
-        assert len(audio_chunks) > 0
+        assert len(audio) > 0
 
         # Write to output directory
-        save_audio(audio_chunks, "test_prosody.mp3")
+        save_audio(audio, "test_prosody.mp3")
 
     def test_tts_with_different_models(self, client, save_audio):
         """Test TTS with different models."""
@@ -57,13 +51,11 @@ def test_tts_with_different_models(self, client, save_audio):
 
         for model in models:
             try:
-                audio_chunks = list(
-                    client.tts.convert(text=f"Testing model {model}", model=model)
-                )
-                assert len(audio_chunks) > 0, f"Failed for model: {model}"
+                audio = client.tts.convert(text=f"Testing model {model}", model=model)
+                assert len(audio) > 0, f"Failed for model: {model}"
 
                 # Write to output directory
-                save_audio(audio_chunks, f"test_model_{model}.mp3")
+                save_audio(audio, f"test_model_{model}.mp3")
             except Exception as e:
                 # Some models might not be available
                 pytest.skip(f"Model {model} not available: {e}")
@@ -73,23 +65,21 @@ def test_tts_longer_text(self, client, save_audio):
         long_text = "This is a longer piece of text for testing. " * 10
         config = TTSConfig(chunk_length=200)
 
-        audio_chunks = list(client.tts.convert(text=long_text, config=config))
+        audio = client.tts.convert(text=long_text, config=config)
 
-        assert len(audio_chunks) > 0
-        complete_audio = b"".join(audio_chunks)
         # Longer text should produce more audio
-        assert len(complete_audio) > 5000
+        assert len(audio) > 5000
 
         # Write to output directory
-        save_audio(audio_chunks, "test_longer_text.mp3")
+        save_audio(audio, "test_longer_text.mp3")
 
     def test_tts_empty_text_should_fail(self, client):
         """Test that empty text is handled."""
         # This might succeed with silence or fail - test behavior
         try:
-            audio_chunks = list(client.tts.convert(text=""))
+            audio = client.tts.convert(text="")
             # If it succeeds, verify we get something
-            assert len(audio_chunks) >= 0
+            assert isinstance(audio, bytes)
         except Exception:
             # If it fails, that's also acceptable
             pass
@@ -101,16 +91,13 @@ class TestAsyncTTSIntegration:
     @pytest.mark.asyncio
     async def test_basic_async_tts(self, async_client, save_audio):
         """Test basic async text-to-speech generation."""
-        audio_chunks = []
-        async for chunk in async_client.tts.convert(text="Hello from async"):
-            audio_chunks.append(chunk)
+        audio = await async_client.tts.convert(text="Hello from async")
 
-        assert len(audio_chunks) > 0
-        complete_audio = b"".join(audio_chunks)
-        assert len(complete_audio) > 1000
+        assert len(audio) > 1000
+        assert isinstance(audio, bytes)
 
         # Write to output directory
-        save_audio(audio_chunks, "test_async_basic.mp3")
+        save_audio(audio, "test_async_basic.mp3")
 
     @pytest.mark.asyncio
     async def test_async_tts_with_prosody(self, async_client, save_audio):
@@ -118,13 +105,9 @@ async def test_async_tts_with_prosody(self, async_client, save_audio):
         prosody = Prosody(speed=0.8, volume=-0.2)
         config = TTSConfig(prosody=prosody)
 
-        audio_chunks = []
-        async for chunk in async_client.tts.convert(
-            text="Async prosody test", config=config
-        ):
-            audio_chunks.append(chunk)
+        audio = await async_client.tts.convert(text="Async prosody test", config=config)
 
-        assert len(audio_chunks) > 0
+        assert len(audio) > 0
 
         # Write to output directory
-        save_audio(audio_chunks, "test_async_prosody.mp3")
+        save_audio(audio, "test_async_prosody.mp3")
diff --git a/tests/unit/test_tts.py b/tests/unit/test_tts.py
index 6ddff60..47bfb06 100644
--- a/tests/unit/test_tts.py
+++ b/tests/unit/test_tts.py
@@ -40,15 +40,15 @@ def async_tts_client(async_mock_client_wrapper):
 class TestTTSClient:
     """Test synchronous TTSClient."""
 
-    def test_convert_basic(self, tts_client, mock_client_wrapper):
-        """Test basic TTS conversion."""
+    def test_stream_basic(self, tts_client, mock_client_wrapper):
+        """Test basic TTS streaming."""
         # Setup mock response with audio chunks
         mock_response = Mock()
         mock_response.iter_bytes.return_value = iter([b"chunk1", b"chunk2", b"chunk3"])
         mock_client_wrapper.request.return_value = mock_response
 
-        # Call convert
-        audio_chunks = list(tts_client.convert(text="Hello world"))
+        # Call stream
+        audio_chunks = list(tts_client.stream(text="Hello world"))
 
         # Verify we got chunks back
         assert audio_chunks == [b"chunk1", b"chunk2", b"chunk3"]
@@ -67,6 +67,23 @@ def test_convert_basic(self, tts_client, mock_client_wrapper):
         # Check payload was msgpack encoded
         assert "content" in call_args[1]
 
+    def test_convert_basic(self, tts_client, mock_client_wrapper):
+        """Test basic TTS conversion returns bytes."""
+        # Setup mock response with audio chunks
+        mock_response = Mock()
+        mock_response.iter_bytes.return_value = iter([b"chunk1", b"chunk2", b"chunk3"])
+        mock_client_wrapper.request.return_value = mock_response
+
+        # Call convert
+        audio = tts_client.convert(text="Hello world")
+
+        # Verify we got complete audio as bytes
+        assert audio == b"chunk1chunk2chunk3"
+        assert isinstance(audio, bytes)
+
+        # Verify request was made correctly
+        mock_client_wrapper.request.assert_called_once()
+
     def test_convert_with_reference_id(self, tts_client, mock_client_wrapper):
         """Test TTS with reference voice ID."""
         mock_response = Mock()
@@ -74,7 +91,7 @@ def test_convert_with_reference_id(self, tts_client, mock_client_wrapper):
         mock_client_wrapper.request.return_value = mock_response
 
         config = TTSConfig(reference_id="voice_123")
-        list(tts_client.convert(text="Hello", config=config))
+        tts_client.convert(text="Hello", config=config)
 
         # Verify reference_id in payload
         call_args = mock_client_wrapper.request.call_args
@@ -87,7 +104,7 @@ def test_convert_with_reference_id_parameter(self, tts_client, mock_client_wrapp
         mock_response.iter_bytes.return_value = iter([b"audio"])
         mock_client_wrapper.request.return_value = mock_response
 
-        list(tts_client.convert(text="Hello", reference_id="voice_456"))
+        tts_client.convert(text="Hello", reference_id="voice_456")
 
         # Verify reference_id in payload
         call_args = mock_client_wrapper.request.call_args
@@ -103,11 +120,7 @@ def test_convert_parameter_reference_id_overrides_config(
         mock_client_wrapper.request.return_value = mock_response
 
         config = TTSConfig(reference_id="voice_from_config")
-        list(
-            tts_client.convert(
-                text="Hello", reference_id="voice_from_param", config=config
-            )
-        )
+        tts_client.convert(text="Hello", reference_id="voice_from_param", config=config)
 
         # Verify parameter reference_id takes precedence
         call_args = mock_client_wrapper.request.call_args
@@ -126,7 +139,7 @@ def test_convert_with_references(self, tts_client, mock_client_wrapper):
         ]
 
         config = TTSConfig(references=references)
-        list(tts_client.convert(text="Hello", config=config))
+        tts_client.convert(text="Hello", config=config)
 
         # Verify references in payload
         call_args = mock_client_wrapper.request.call_args
@@ -146,7 +159,7 @@ def test_convert_with_references_parameter(self, tts_client, mock_client_wrapper
             ReferenceAudio(audio=b"ref_audio_2", text="Sample 2"),
         ]
 
-        list(tts_client.convert(text="Hello", references=references))
+        tts_client.convert(text="Hello", references=references)
 
         # Verify references in payload
         call_args = mock_client_wrapper.request.call_args
@@ -167,7 +180,7 @@ def test_convert_parameter_references_overrides_config(
         param_refs = [ReferenceAudio(audio=b"param_audio", text="Param")]
 
         config = TTSConfig(references=config_refs)
-        list(tts_client.convert(text="Hello", references=param_refs, config=config))
+        tts_client.convert(text="Hello", references=param_refs, config=config)
 
         # Verify parameter references take precedence
         call_args = mock_client_wrapper.request.call_args
@@ -181,7 +194,7 @@ def test_convert_with_different_backend(self, tts_client, mock_client_wrapper):
         mock_response.iter_bytes.return_value = iter([b"audio"])
         mock_client_wrapper.request.return_value = mock_response
 
-        list(tts_client.convert(text="Hello", model="s1"))
+        tts_client.convert(text="Hello", model="s1")
 
         # Verify model in headers
         call_args = mock_client_wrapper.request.call_args
@@ -196,7 +209,7 @@ def test_convert_with_prosody(self, tts_client, mock_client_wrapper):
         prosody = Prosody(speed=1.5, volume=0.5)
         config = TTSConfig(prosody=prosody)
 
-        list(tts_client.convert(text="Hello", config=config))
+        tts_client.convert(text="Hello", config=config)
 
         # Verify prosody in payload
         call_args = mock_client_wrapper.request.call_args
@@ -221,7 +234,7 @@ def test_convert_with_custom_parameters(self, tts_client, mock_client_wrapper):
             temperature=0.8,
         )
 
-        list(tts_client.convert(text="Hello", config=config))
+        tts_client.convert(text="Hello", config=config)
 
         # Verify parameters in payload
         call_args = mock_client_wrapper.request.call_args
@@ -242,7 +255,7 @@ def test_convert_omit_parameters_not_sent(self, tts_client, mock_client_wrapper)
         mock_client_wrapper.request.return_value = mock_response
 
         # Call with defaults (None values should be excluded)
-        list(tts_client.convert(text="Hello"))
+        tts_client.convert(text="Hello")
 
         # Verify None params not in payload
         call_args = mock_client_wrapper.request.call_args
@@ -266,14 +279,14 @@ def test_convert_with_request_options(self, tts_client, mock_client_wrapper):
             timeout=120.0, additional_headers={"X-Custom": "value"}
         )
 
-        list(tts_client.convert(text="Hello", request_options=request_options))
+        tts_client.convert(text="Hello", request_options=request_options)
 
         # Verify request_options passed through
         call_args = mock_client_wrapper.request.call_args
         assert call_args[1]["request_options"] == request_options
 
-    def test_convert_streaming_behavior(self, tts_client, mock_client_wrapper):
-        """Test that convert returns an iterator that can be consumed."""
+    def test_stream_behavior(self, tts_client, mock_client_wrapper):
+        """Test that stream returns an iterator that can be consumed."""
         # Setup mock with multiple chunks
         mock_response = Mock()
         chunks = [b"chunk1", b"chunk2", b"chunk3", b""]  # Empty chunk should be skipped
@@ -281,11 +294,11 @@ def test_convert_streaming_behavior(self, tts_client, mock_client_wrapper):
         mock_client_wrapper.request.return_value = mock_response
 
         # Get iterator
-        audio_iterator = tts_client.convert(text="Hello")
+        audio_stream = tts_client.stream(text="Hello")
 
         # Consume one chunk at a time
         result = []
-        for chunk in audio_iterator:
+        for chunk in audio_stream:
             result.append(chunk)
 
         # Empty chunk should be filtered out
@@ -297,9 +310,9 @@ def test_convert_empty_response(self, tts_client, mock_client_wrapper):
         mock_response.iter_bytes.return_value = iter([])
         mock_client_wrapper.request.return_value = mock_response
 
-        audio_chunks = list(tts_client.convert(text="Hello"))
+        audio = tts_client.convert(text="Hello")
 
-        assert audio_chunks == []
+        assert audio == b""
 
     def test_convert_with_format_parameter(self, tts_client, mock_client_wrapper):
         """Test TTS with format as direct parameter."""
@@ -307,7 +320,7 @@ def test_convert_with_format_parameter(self, tts_client, mock_client_wrapper):
         mock_response.iter_bytes.return_value = iter([b"audio"])
         mock_client_wrapper.request.return_value = mock_response
 
-        list(tts_client.convert(text="Hello", format="wav"))
+        tts_client.convert(text="Hello", format="wav")
 
         # Verify format in payload
         call_args = mock_client_wrapper.request.call_args
@@ -320,7 +333,7 @@ def test_convert_with_opus_format(self, tts_client, mock_client_wrapper):
         mock_response.iter_bytes.return_value = iter([b"audio"])
         mock_client_wrapper.request.return_value = mock_response
 
-        list(tts_client.convert(text="Hello", format="opus"))
+        tts_client.convert(text="Hello", format="opus")
 
         # Verify opus format in payload
         call_args = mock_client_wrapper.request.call_args
@@ -333,7 +346,7 @@ def test_convert_with_latency_parameter(self, tts_client, mock_client_wrapper):
         mock_response.iter_bytes.return_value = iter([b"audio"])
         mock_client_wrapper.request.return_value = mock_response
 
-        list(tts_client.convert(text="Hello", latency="normal"))
+        tts_client.convert(text="Hello", latency="normal")
 
         # Verify latency in payload
         call_args = mock_client_wrapper.request.call_args
@@ -346,7 +359,7 @@ def test_convert_with_speed_parameter(self, tts_client, mock_client_wrapper):
         mock_response.iter_bytes.return_value = iter([b"audio"])
         mock_client_wrapper.request.return_value = mock_response
 
-        list(tts_client.convert(text="Hello", speed=1.5))
+        tts_client.convert(text="Hello", speed=1.5)
 
         # Verify speed creates prosody in payload
         call_args = mock_client_wrapper.request.call_args
@@ -362,7 +375,7 @@ def test_convert_parameter_format_overrides_config(
         mock_client_wrapper.request.return_value = mock_response
 
         config = TTSConfig(format="wav")
-        list(tts_client.convert(text="Hello", format="pcm", config=config))
+        tts_client.convert(text="Hello", format="pcm", config=config)
 
         # Verify parameter format takes precedence
         call_args = mock_client_wrapper.request.call_args
@@ -378,7 +391,7 @@ def test_convert_parameter_speed_overrides_config_prosody(
         mock_client_wrapper.request.return_value = mock_response
 
         config = TTSConfig(prosody=Prosody(speed=2.0, volume=0.5))
-        list(tts_client.convert(text="Hello", speed=1.5, config=config))
+        tts_client.convert(text="Hello", speed=1.5, config=config)
 
         # Verify parameter speed takes precedence but volume is preserved
         call_args = mock_client_wrapper.request.call_args
@@ -410,8 +423,8 @@ class TestAsyncTTSClient:
     """Test asynchronous AsyncTTSClient."""
 
     @pytest.mark.asyncio
-    async def test_convert_basic(self, async_tts_client, async_mock_client_wrapper):
-        """Test basic async TTS conversion."""
+    async def test_stream_basic(self, async_tts_client, async_mock_client_wrapper):
+        """Test basic async TTS streaming."""
         # Setup mock response
         mock_response = Mock()
 
@@ -422,9 +435,10 @@ async def async_iter_bytes():
         mock_response.aiter_bytes = async_iter_bytes
         async_mock_client_wrapper.request = AsyncMock(return_value=mock_response)
 
-        # Call convert and collect chunks
+        # Call stream and collect chunks
         audio_chunks = []
-        async for chunk in async_tts_client.convert(text="Hello world"):
+        stream = await async_tts_client.stream(text="Hello world")
+        async for chunk in stream:
             audio_chunks.append(chunk)
 
         assert audio_chunks == [b"chunk1", b"chunk2", b"chunk3"]
@@ -436,6 +450,29 @@ async def async_iter_bytes():
         assert call_args[0][0] == "POST"
         assert call_args[0][1] == "/v1/tts"
 
+    @pytest.mark.asyncio
+    async def test_convert_basic(self, async_tts_client, async_mock_client_wrapper):
+        """Test basic async TTS conversion returns bytes."""
+        # Setup mock response
+        mock_response = Mock()
+
+        async def async_iter_bytes():
+            for chunk in [b"chunk1", b"chunk2", b"chunk3"]:
+                yield chunk
+
+        mock_response.aiter_bytes = async_iter_bytes
+        async_mock_client_wrapper.request = AsyncMock(return_value=mock_response)
+
+        # Call convert
+        audio = await async_tts_client.convert(text="Hello world")
+
+        # Verify we got complete audio as bytes
+        assert audio == b"chunk1chunk2chunk3"
+        assert isinstance(audio, bytes)
+
+        # Verify request was made
+        async_mock_client_wrapper.request.assert_called_once()
+
     @pytest.mark.asyncio
     async def test_convert_with_reference_id(
         self, async_tts_client, async_mock_client_wrapper
@@ -450,9 +487,7 @@ async def async_iter_bytes():
         async_mock_client_wrapper.request = AsyncMock(return_value=mock_response)
 
         config = TTSConfig(reference_id="voice_123")
-        audio_chunks = []
-        async for chunk in async_tts_client.convert(text="Hello", config=config):
-            audio_chunks.append(chunk)
+        await async_tts_client.convert(text="Hello", config=config)
 
         # Verify reference_id in payload
         call_args = async_mock_client_wrapper.request.call_args
@@ -472,11 +507,7 @@ async def async_iter_bytes():
         mock_response.aiter_bytes = async_iter_bytes
         async_mock_client_wrapper.request = AsyncMock(return_value=mock_response)
 
-        audio_chunks = []
-        async for chunk in async_tts_client.convert(
-            text="Hello", reference_id="voice_456"
-        ):
-            audio_chunks.append(chunk)
+        await async_tts_client.convert(text="Hello", reference_id="voice_456")
 
         # Verify reference_id in payload
         call_args = async_mock_client_wrapper.request.call_args
@@ -497,11 +528,9 @@ async def async_iter_bytes():
         async_mock_client_wrapper.request = AsyncMock(return_value=mock_response)
 
         config = TTSConfig(reference_id="voice_from_config")
-        audio_chunks = []
-        async for chunk in async_tts_client.convert(
+        await async_tts_client.convert(
             text="Hello", reference_id="voice_from_param", config=config
-        ):
-            audio_chunks.append(chunk)
+        )
 
         # Verify parameter reference_id takes precedence
         call_args = async_mock_client_wrapper.request.call_args
@@ -526,11 +555,7 @@ async def async_iter_bytes():
             ReferenceAudio(audio=b"ref_audio_2", text="Sample 2"),
         ]
 
-        audio_chunks = []
-        async for chunk in async_tts_client.convert(
-            text="Hello", references=references
-        ):
-            audio_chunks.append(chunk)
+        await async_tts_client.convert(text="Hello", references=references)
 
         # Verify references in payload
         call_args = async_mock_client_wrapper.request.call_args
@@ -556,11 +581,9 @@ async def async_iter_bytes():
         param_refs = [ReferenceAudio(audio=b"param_audio", text="Param")]
 
         config = TTSConfig(references=config_refs)
-        audio_chunks = []
-        async for chunk in async_tts_client.convert(
+        await async_tts_client.convert(
             text="Hello", references=param_refs, config=config
-        ):
-            audio_chunks.append(chunk)
+        )
 
         # Verify parameter references take precedence
         call_args = async_mock_client_wrapper.request.call_args
@@ -584,9 +607,7 @@ async def async_iter_bytes():
         prosody = Prosody(speed=2.0, volume=1.0)
         config = TTSConfig(prosody=prosody)
 
-        audio_chunks = []
-        async for chunk in async_tts_client.convert(text="Hello", config=config):
-            audio_chunks.append(chunk)
+        await async_tts_client.convert(text="Hello", config=config)
 
         # Verify prosody in payload
         call_args = async_mock_client_wrapper.request.call_args
@@ -607,9 +628,7 @@ async def async_iter_bytes():
         mock_response.aiter_bytes = async_iter_bytes
         async_mock_client_wrapper.request = AsyncMock(return_value=mock_response)
 
-        audio_chunks = []
-        async for chunk in async_tts_client.convert(text="Hello"):
-            audio_chunks.append(chunk)
+        await async_tts_client.convert(text="Hello")
 
         # Verify OMIT params not in payload
         call_args = async_mock_client_wrapper.request.call_args
@@ -633,11 +652,9 @@ async def async_iter_bytes():
         mock_response.aiter_bytes = async_iter_bytes
         async_mock_client_wrapper.request = AsyncMock(return_value=mock_response)
 
-        audio_chunks = []
-        async for chunk in async_tts_client.convert(text="Hello"):
-            audio_chunks.append(chunk)
+        audio = await async_tts_client.convert(text="Hello")
 
-        assert audio_chunks == []
+        assert audio == b""
 
     @pytest.mark.asyncio
     async def test_convert_with_format_parameter(
@@ -652,9 +669,7 @@ async def async_iter_bytes():
         mock_response.aiter_bytes = async_iter_bytes
         async_mock_client_wrapper.request = AsyncMock(return_value=mock_response)
 
-        audio_chunks = []
-        async for chunk in async_tts_client.convert(text="Hello", format="wav"):
-            audio_chunks.append(chunk)
+        await async_tts_client.convert(text="Hello", format="wav")
 
         # Verify format in payload
         call_args = async_mock_client_wrapper.request.call_args
@@ -674,9 +689,7 @@ async def async_iter_bytes():
         mock_response.aiter_bytes = async_iter_bytes
         async_mock_client_wrapper.request = AsyncMock(return_value=mock_response)
 
-        audio_chunks = []
-        async for chunk in async_tts_client.convert(text="Hello", latency="normal"):
-            audio_chunks.append(chunk)
+        await async_tts_client.convert(text="Hello", latency="normal")
 
         # Verify latency in payload
         call_args = async_mock_client_wrapper.request.call_args
@@ -696,9 +709,7 @@ async def async_iter_bytes():
         mock_response.aiter_bytes = async_iter_bytes
         async_mock_client_wrapper.request = AsyncMock(return_value=mock_response)
 
-        audio_chunks = []
-        async for chunk in async_tts_client.convert(text="Hello", speed=1.5):
-            audio_chunks.append(chunk)
+        await async_tts_client.convert(text="Hello", speed=1.5)
 
         # Verify speed creates prosody in payload
         call_args = async_mock_client_wrapper.request.call_args
@@ -719,11 +730,7 @@ async def async_iter_bytes():
         async_mock_client_wrapper.request = AsyncMock(return_value=mock_response)
 
         config = TTSConfig(format="wav")
-        audio_chunks = []
-        async for chunk in async_tts_client.convert(
-            text="Hello", format="pcm", config=config
-        ):
-            audio_chunks.append(chunk)
+        await async_tts_client.convert(text="Hello", format="pcm", config=config)
 
         # Verify parameter format takes precedence
         call_args = async_mock_client_wrapper.request.call_args
@@ -744,11 +751,7 @@ async def async_iter_bytes():
         async_mock_client_wrapper.request = AsyncMock(return_value=mock_response)
 
         config = TTSConfig(prosody=Prosody(speed=2.0, volume=0.5))
-        audio_chunks = []
-        async for chunk in async_tts_client.convert(
-            text="Hello", speed=1.5, config=config
-        ):
-            audio_chunks.append(chunk)
+        await async_tts_client.convert(text="Hello", speed=1.5, config=config)
 
         # Verify parameter speed takes precedence but volume is preserved
         call_args = async_mock_client_wrapper.request.call_args
@@ -769,11 +772,9 @@ async def async_iter_bytes():
         mock_response.aiter_bytes = async_iter_bytes
         async_mock_client_wrapper.request = AsyncMock(return_value=mock_response)
 
-        audio_chunks = []
-        async for chunk in async_tts_client.convert(
+        await async_tts_client.convert(
             text="Hello", format="wav", speed=1.3, latency="normal"
-        ):
-            audio_chunks.append(chunk)
+        )
 
         # Verify all parameters in payload
         call_args = async_mock_client_wrapper.request.call_args

From 80349873ad7851acc644228a85064f68aafccd41 Mon Sep 17 00:00:00 2001
From: James Ding <jamesding365@gmail.com>
Date: Thu, 13 Nov 2025 19:47:02 -0600
Subject: [PATCH 11/16] docs: enhance docstrings for models with detailed
 attribute descriptions

---
 src/fishaudio/types/account.py | 25 ++++++++++-
 src/fishaudio/types/asr.py     | 16 ++++++-
 src/fishaudio/types/shared.py  |  9 +++-
 src/fishaudio/types/tts.py     | 77 +++++++++++++++++++++++++++++++---
 src/fishaudio/types/voices.py  | 42 +++++++++++++++++--
 5 files changed, 154 insertions(+), 15 deletions(-)

diff --git a/src/fishaudio/types/account.py b/src/fishaudio/types/account.py
index 2803383..966ade5 100644
--- a/src/fishaudio/types/account.py
+++ b/src/fishaudio/types/account.py
@@ -7,7 +7,17 @@
 
 
 class Credits(BaseModel):
-    """User's API credit balance."""
+    """User's API credit balance.
+
+    Attributes:
+        id: Unique credits record identifier
+        user_id: User identifier
+        credit: Current credit balance (decimal for precise accounting)
+        created_at: Timestamp when the credits record was created
+        updated_at: Timestamp when the credits were last updated
+        has_phone_sha256: Whether the user has a verified phone number. Optional
+        has_free_credit: Whether the user has received free credits. Optional
+    """
 
     model_config = ConfigDict(populate_by_name=True)
 
@@ -21,7 +31,18 @@ class Credits(BaseModel):
 
 
 class Package(BaseModel):
-    """User's prepaid package information."""
+    """User's prepaid package information.
+
+    Attributes:
+        id: Unique package identifier
+        user_id: User identifier
+        type: Package type identifier
+        total: Total units in the package
+        balance: Remaining units in the package
+        created_at: Timestamp when the package was purchased
+        updated_at: Timestamp when the package was last updated
+        finished_at: Timestamp when the package was fully consumed. None if still active
+    """
 
     model_config = ConfigDict(populate_by_name=True)
 
diff --git a/src/fishaudio/types/asr.py b/src/fishaudio/types/asr.py
index 84d2dbb..db73916 100644
--- a/src/fishaudio/types/asr.py
+++ b/src/fishaudio/types/asr.py
@@ -6,7 +6,13 @@
 
 
 class ASRSegment(BaseModel):
-    """A timestamped segment of transcribed text."""
+    """A timestamped segment of transcribed text.
+
+    Attributes:
+        text: The transcribed text for this segment
+        start: Segment start time in seconds
+        end: Segment end time in seconds
+    """
 
     text: str
     start: float
@@ -14,7 +20,13 @@ class ASRSegment(BaseModel):
 
 
 class ASRResponse(BaseModel):
-    """Response from speech-to-text transcription."""
+    """Response from speech-to-text transcription.
+
+    Attributes:
+        text: Complete transcription of the entire audio
+        duration: Total audio duration in milliseconds
+        segments: List of timestamped text segments. Empty if include_timestamps=False
+    """
 
     text: str
     duration: float  # Duration in milliseconds
diff --git a/src/fishaudio/types/shared.py b/src/fishaudio/types/shared.py
index df7ab4a..1e756d9 100644
--- a/src/fishaudio/types/shared.py
+++ b/src/fishaudio/types/shared.py
@@ -9,7 +9,12 @@
 
 
 class PaginatedResponse(BaseModel, Generic[T]):
-    """Generic paginated response."""
+    """Generic paginated response.
+
+    Attributes:
+        total: Total number of items across all pages
+        items: List of items on the current page
+    """
 
     total: int
     items: List[T]
@@ -25,7 +30,7 @@ class PaginatedResponse(BaseModel, Generic[T]):
 Visibility = Literal["public", "unlist", "private"]
 
 # Training mode types
-TrainMode = Literal["fast", "full"]
+TrainMode = Literal["fast"]
 
 # Model state types
 ModelState = Literal["created", "training", "trained", "failed"]
diff --git a/src/fishaudio/types/tts.py b/src/fishaudio/types/tts.py
index 4dd7671..8b0923a 100644
--- a/src/fishaudio/types/tts.py
+++ b/src/fishaudio/types/tts.py
@@ -8,14 +8,27 @@
 
 
 class ReferenceAudio(BaseModel):
-    """Reference audio for voice cloning/style."""
+    """Reference audio for voice cloning/style.
+
+    Attributes:
+        audio: Audio file bytes for the reference sample
+        text: Transcription of what is spoken in the reference audio. Should match exactly
+            what's spoken and include punctuation for proper prosody.
+    """
 
     audio: bytes
     text: str
 
 
 class Prosody(BaseModel):
-    """Speech prosody settings (speed and volume)."""
+    """Speech prosody settings (speed and volume).
+
+    Attributes:
+        speed: Speech speed multiplier. Range: 0.5-2.0. Default: 1.0.
+            Examples: 1.5 = 50% faster, 0.8 = 20% slower
+        volume: Volume adjustment in decibels. Range: -20.0 to 20.0. Default: 0.0 (no change).
+            Positive values increase volume, negative values decrease it.
+    """
 
     speed: Annotated[float, Field(ge=0.5, le=2.0)] = 1.0
     volume: Annotated[float, Field(ge=-20.0, le=20.0)] = 0.0
@@ -45,6 +58,23 @@ class TTSConfig(BaseModel):
 
     Reusable configuration for text-to-speech requests. Create once, use multiple times.
     All parameters have sensible defaults.
+
+    Attributes:
+        format: Audio output format. Options: "mp3", "wav", "pcm", "opus". Default: "mp3"
+        sample_rate: Audio sample rate in Hz. If None, uses format-specific default.
+        mp3_bitrate: MP3 bitrate in kbps. Options: 64, 128, 192. Default: 128
+        opus_bitrate: Opus bitrate in kbps. Options: -1000, 24, 32, 48, 64. Default: 32
+        normalize: Whether to normalize/clean the input text. Default: True
+        chunk_length: Characters per generation chunk. Range: 100-300. Default: 200.
+            Lower values = faster initial response, higher values = better quality
+        latency: Generation mode. Options: "normal" (higher quality), "balanced" (faster). Default: "balanced"
+        reference_id: Voice model ID from fish.audio (e.g., "802e3bc2b27e49c2995d23ef70e6ac89").
+            Find IDs in voice URLs or via voices.list()
+        references: List of reference audio samples for instant voice cloning. Default: []
+        prosody: Speech speed and volume settings. Default: None (uses natural prosody)
+        top_p: Nucleus sampling parameter for token selection. Range: 0.0-1.0. Default: 0.7
+        temperature: Randomness in generation. Range: 0.0-1.0. Default: 0.7.
+            Higher = more varied, lower = more consistent
     """
 
     # Audio output settings
@@ -74,6 +104,21 @@ class TTSRequest(BaseModel):
 
     This model is used internally for WebSocket streaming.
     For the HTTP API, parameters are passed directly to methods.
+
+    Attributes:
+        text: Text to synthesize into speech
+        chunk_length: Characters per generation chunk. Range: 100-300. Default: 200
+        format: Audio output format. Options: "mp3", "wav", "pcm", "opus". Default: "mp3"
+        sample_rate: Audio sample rate in Hz. If None, uses format-specific default
+        mp3_bitrate: MP3 bitrate in kbps. Options: 64, 128, 192. Default: 128
+        opus_bitrate: Opus bitrate in kbps. Options: -1000, 24, 32, 48, 64. Default: 32
+        references: List of reference audio samples for voice cloning. Default: []
+        reference_id: Voice model ID for using a specific voice. Default: None
+        normalize: Whether to normalize/clean the input text. Default: True
+        latency: Generation mode. Options: "normal", "balanced". Default: "balanced"
+        prosody: Speech speed and volume settings. Default: None
+        top_p: Nucleus sampling for token selection. Range: 0.0-1.0. Default: 0.7
+        temperature: Randomness in generation. Range: 0.0-1.0. Default: 0.7
     """
 
     text: str
@@ -93,26 +138,46 @@ class TTSRequest(BaseModel):
 
 # WebSocket event types for streaming TTS
 class StartEvent(BaseModel):
-    """WebSocket start event."""
+    """WebSocket start event to initiate TTS streaming.
+
+    Attributes:
+        event: Event type identifier, always "start"
+        request: TTS configuration for the streaming session
+    """
 
     event: Literal["start"] = "start"
     request: TTSRequest
 
 
 class TextEvent(BaseModel):
-    """WebSocket text chunk event."""
+    """WebSocket event to send a text chunk for synthesis.
+
+    Attributes:
+        event: Event type identifier, always "text"
+        text: Text chunk to synthesize
+    """
 
     event: Literal["text"] = "text"
     text: str
 
 
 class FlushEvent(BaseModel):
-    """WebSocket flush event - forces buffer to generate audio immediately."""
+    """WebSocket event to force immediate audio generation from buffered text.
+
+    Use this to ensure all buffered text is synthesized without waiting for more input.
+
+    Attributes:
+        event: Event type identifier, always "flush"
+    """
 
     event: Literal["flush"] = "flush"
 
 
 class CloseEvent(BaseModel):
-    """WebSocket close event."""
+    """WebSocket event to end the streaming session.
+
+    Attributes:
+        event: Event type identifier, always "stop"
+    """
 
     event: Literal["stop"] = "stop"
diff --git a/src/fishaudio/types/voices.py b/src/fishaudio/types/voices.py
index 90e41b2..04f7570 100644
--- a/src/fishaudio/types/voices.py
+++ b/src/fishaudio/types/voices.py
@@ -9,7 +9,14 @@
 
 
 class Sample(BaseModel):
-    """A sample audio for a voice model."""
+    """A sample audio for a voice model.
+
+    Attributes:
+        title: Title/name of the audio sample
+        text: Transcription of the spoken content in the sample
+        task_id: Unique identifier for the sample task
+        audio: URL or path to the audio file
+    """
 
     title: str
     text: str
@@ -18,7 +25,13 @@ class Sample(BaseModel):
 
 
 class Author(BaseModel):
-    """Voice model author information."""
+    """Voice model author information.
+
+    Attributes:
+        id: Unique author identifier
+        nickname: Author's display name
+        avatar: URL to author's avatar image
+    """
 
     id: str = Field(alias="_id")
     nickname: str
@@ -27,9 +40,32 @@ class Author(BaseModel):
 
 class Voice(BaseModel):
     """
-    A voice model
+    A voice model.
 
     Represents a TTS voice that can be used for synthesis.
+
+    Attributes:
+        id: Unique voice model identifier (use as reference_id in TTS)
+        type: Model type. Options: "svc" (singing voice conversion), "tts" (text-to-speech)
+        title: Voice model title/name
+        description: Detailed description of the voice model
+        cover_image: URL to the voice model's cover image
+        train_mode: Training mode used. Options: "fast"
+        state: Current model state (e.g., "ready", "training", "failed")
+        tags: List of tags for categorization (e.g., ["male", "english", "young"])
+        samples: List of audio samples demonstrating the voice
+        created_at: Timestamp when the model was created
+        updated_at: Timestamp when the model was last updated
+        languages: List of supported language codes (e.g., ["en", "zh"])
+        visibility: Model visibility. Options: "public", "private", "unlisted"
+        lock_visibility: Whether visibility setting is locked
+        like_count: Number of likes the model has received
+        mark_count: Number of bookmarks/favorites
+        shared_count: Number of times the model has been shared
+        task_count: Number of times the model has been used for generation
+        liked: Whether the current user has liked this model. Default: False
+        marked: Whether the current user has bookmarked this model. Default: False
+        author: Information about the voice model's creator
     """
 
     id: str = Field(alias="_id")

From 1ea482947236d421216ae0e76d84c5b2abec34ef Mon Sep 17 00:00:00 2001
From: James Ding <jamesding365@gmail.com>
Date: Thu, 13 Nov 2025 19:52:31 -0600
Subject: [PATCH 12/16] docs: update legacy SDK documentation links in
 README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index c57d150..356be16 100644
--- a/README.md
+++ b/README.md
@@ -212,6 +212,6 @@ from fish_audio_sdk import Session
 session = Session("your_api_key")
 ```
 
-For complete legacy SDK documentation, see the [Legacy API Documentation](https://docs.fish.audio/legacy).
+For complete legacy SDK documentation, see the [Legacy API Documentation](https://docs.fish.audio/archive/python-sdk-legacy).
 
-We recommend migrating to the new `fishaudio` module - see our [Migration Guide](https://docs.fish.audio) for assistance.
+We recommend migrating to the new `fishaudio` module - see our [Migration Guide](https://docs.fish.audio/archive/python-sdk-legacy/migration-guide) for assistance.

From c68c1801ce7e7f4eaadaed86382caf5a94c2cad4 Mon Sep 17 00:00:00 2001
From: James Ding <jamesding365@gmail.com>
Date: Thu, 13 Nov 2025 19:58:33 -0600
Subject: [PATCH 13/16] docs: update README.md to reflect new API features and
 usage examples

---
 README.md | 273 ++++++++++++++++++++++++++++++------------------------
 1 file changed, 153 insertions(+), 120 deletions(-)

diff --git a/README.md b/README.md
index 356be16..1bb11a7 100644
--- a/README.md
+++ b/README.md
@@ -6,212 +6,245 @@
 [![codecov](https://img.shields.io/codecov/c/github/fishaudio/fish-audio-python)](https://codecov.io/gh/fishaudio/fish-audio-python)
 [![License](https://img.shields.io/github/license/fishaudio/fish-audio-python)](https://github.com/fishaudio/fish-audio-python/blob/main/LICENSE)
 
-The official Python library for the Fish Audio API.
+The official Python library for the Fish Audio API
 
-## Notice: New API Available
+**Documentation:** [Python SDK Guide](https://docs.fish.audio/developer-guide/sdk-guide/python/) | [API Reference](https://docs.fish.audio/api-reference/sdk/python/)
 
-The SDK now includes a modern `fishaudio` API with improved ergonomics, better type safety, and enhanced features.
-
-For new projects, use the `fishaudio` module. For existing projects using the legacy API, see the [Legacy SDK section](#legacy-sdk) below
-
-## API Documentation
-
-For complete documentation and API reference, visit the [Python SDK Guide](https://docs.fish.audio/developer-guide/sdk-guide/python/) and [API Reference](https://docs.fish.audio/api-reference/sdk/python/).
+> **Note:** If you're using the legacy `fish_audio_sdk` API, see the [migration guide](https://docs.fish.audio/archive/python-sdk-legacy/migration-guide) to upgrade.
 
 ## Installation
 
-This package is available on PyPI:
-
 ```bash
 pip install fish-audio-sdk
+
+# With audio playback utilities
+pip install fish-audio-sdk[utils]
 ```
 
-You may install from source by running the following command in the repository root:
+## Authentication
+
+Get your API key from [fish.audio/app/api-keys](https://fish.audio/app/api-keys):
 
 ```bash
-python -m pip install .
+export FISH_API_KEY=your_api_key_here
 ```
 
-## Usage
-
-The client will need to be configured with an API key, which you can obtain from [Fish Audio](https://fish.audio/app/api-keys).
+Or provide directly:
 
 ```python
 from fishaudio import FishAudio
 
-client = FishAudio() # Automatically reads from the FISH_API_KEY environment variable
-
-client = FishAudio(api_key="your-api-key") # Or provide the API key directly
+client = FishAudio(api_key="your_api_key")
 ```
 
-The SDK provides [text-to-speech](#text-to-speech), [voice cloning](#instant-voice-cloning), [speech recognition](#speech-recognition-asr), and [voice management](#voice-management) capabilities.
-
-### Text-to-Speech
-
-Convert text to natural-sounding speech with support for multiple voices, formats, and real-time streaming.
+## Quick Start
 
-#### Basic
+**Synchronous:**
 
 ```python
 from fishaudio import FishAudio
-from fishaudio.utils import save, play
+from fishaudio.utils import play, save
 
 client = FishAudio()
 
-audio = client.tts.convert(text="Hello, world!") # Default voice and settings
-play(audio)  # Play audio directly
+# Generate audio
+audio = client.tts.convert(text="Hello, world!")
 
-audio = client.tts.convert(text="Welcome to Fish Audio SDK!")
-save(audio, "output.mp3") # You can also save to a file
+# Play or save
+play(audio)
+save(audio, "output.mp3")
+```
+
+**Asynchronous:**
+
+```python
+import asyncio
+from fishaudio import AsyncFishAudio
+from fishaudio.utils import play, save
+
+async def main():
+    client = AsyncFishAudio()
+    audio = await client.tts.convert(text="Hello, world!")
+    play(audio)
+    save(audio, "output.mp3")
+
+asyncio.run(main())
 ```
 
-#### With Reference Voice
+## Core Features
 
-Use a reference voice ID to ensure consistent voice characteristics across generations:
+### Text-to-Speech
+
+**With custom voice:**
 
 ```python
-# Use an existing voice by ID
+# Use a specific voice by ID
 audio = client.tts.convert(
-    text="This will sound like the reference voice!",
-    reference_id="802e3bc2b27e49c2995d23ef70e6ac89" # Energetic Male
+    text="Custom voice",
+    reference_id="802e3bc2b27e49c2995d23ef70e6ac89"
 )
 ```
 
-#### Instant Voice Cloning
-
-Immediately clone a voice from a short audio sample:
+**With speed control:**
 
 ```python
-# Clone a voice from audio sample
-with open("reference.wav", "rb") as f:
-    audio = client.tts.convert(
-        text="This will sound like the reference voice!",
-        reference_audio=f.read(),
-        reference_text="Transcription of the reference audio"
-    )
+audio = client.tts.convert(
+    text="Speaking faster!",
+    speed=1.5  # 1.5x speed
+)
 ```
 
-#### Streaming Audio Chunks
+**Reusable configuration:**
+
+```python
+from fishaudio.types import TTSConfig, Prosody
 
-For processing audio chunks as they're generated:
+config = TTSConfig(
+    prosody=Prosody(speed=1.2, volume=-5),
+    reference_id="933563129e564b19a115bedd57b7406a",
+    format="wav",
+    latency="balanced"
+)
+
+# Reuse across generations
+audio1 = client.tts.convert(text="First message", config=config)
+audio2 = client.tts.convert(text="Second message", config=config)
+```
+
+**Chunk-by-chunk processing:**
 
 ```python
-# Stream and process audio chunks
-for chunk in client.tts.stream(text="Long text content..."):
-    # Process each chunk as it arrives
+# Stream and process chunks as they arrive
+for chunk in client.tts.stream(text="Long content..."):
     send_to_websocket(chunk)
 
 # Or collect all chunks
 audio = client.tts.stream(text="Hello!").collect()
 ```
 
-#### Real-time WebSocket Streaming
+[Learn more](https://docs.fish.audio/developer-guide/sdk-guide/python/text-to-speech)
 
-For low-latency bidirectional streaming where you send text chunks and receive audio in real-time:
+### Speech-to-Text
 
 ```python
-from fishaudio import FishAudio
-from fishaudio.utils import play
+# Transcribe audio
+with open("audio.wav", "rb") as f:
+    result = client.asr.transcribe(audio=f.read(), language="en")
 
-client = FishAudio()
+print(result.text)
+
+# Access timestamped segments
+for segment in result.segments:
+    print(f"[{segment.start:.2f}s - {segment.end:.2f}s] {segment.text}")
+```
+
+[Learn more](https://docs.fish.audio/developer-guide/sdk-guide/python/speech-to-text)
+
+### Real-time Streaming
+
+Stream dynamically generated text for conversational AI and live applications:
+
+**Synchronous:**
 
-# Stream text chunks and receive audio in real-time
+```python
 def text_chunks():
     yield "Hello, "
     yield "this is "
-    yield "streaming audio!"
+    yield "streaming!"
 
 audio_stream = client.tts.stream_websocket(text_chunks(), latency="balanced")
 play(audio_stream)
 ```
 
-### Speech Recognition (ASR)
-
-To transcribe audio to text:
+**Asynchronous:**
 
 ```python
-from fishaudio import FishAudio
-
-client = FishAudio()
+async def text_chunks():
+    yield "Hello, "
+    yield "this is "
+    yield "streaming!"
 
-# Transcribe audio to text
-with open("audio.wav", "rb") as f:
-    result = client.asr.transcribe(audio=f.read())
-    print(result.text)
+audio_stream = await client.tts.stream_websocket(text_chunks(), latency="balanced")
+play(audio_stream)
 ```
 
-### Voice Management
+[Learn more](https://docs.fish.audio/developer-guide/sdk-guide/python/websocket)
 
-Manage voice references and list available voices.
+### Voice Cloning
 
-```python
-from fishaudio import FishAudio
+**Instant cloning:**
 
-client = FishAudio()
-
-# List available voices
-voices = client.voices.list(language="en", tags="male")
-
-# Get a specific voice by ID
-voice = client.voices.get(voice_id="802e3bc2b27e49c2995d23ef70e6ac89")
+```python
+from fishaudio.types import ReferenceAudio
 
-# Create a custom voice
-with open("voice_sample.wav", "rb") as f:
-    new_voice = client.voices.create(
-        title="My Custom Voice",
-        voices=[f.read()],
-        description="My cloned voice"
+# Clone voice on-the-fly
+with open("reference.wav", "rb") as f:
+    audio = client.tts.convert(
+        text="Cloned voice speaking",
+        references=[ReferenceAudio(
+            audio=f.read(),
+            text="Text spoken in reference"
+        )]
     )
 ```
 
-### Async Usage
-
-You can also use the SDK in asynchronous applications:
+**Persistent voice models:**
 
 ```python
-import asyncio
-from fishaudio import AsyncFishAudio
-
-async def main():
-    client = AsyncFishAudio()
-
-    audio = await client.tts.convert(text="Async text-to-speech!")
-    # Process audio...
+# Create voice model for reuse
+with open("voice_sample.wav", "rb") as f:
+    voice = client.voices.create(
+        title="My Voice",
+        voices=[f.read()],
+        description="Custom voice clone"
+    )
 
-asyncio.run(main())
+# Use the created model
+audio = client.tts.convert(
+    text="Using my saved voice",
+    reference_id=voice.id
+)
 ```
 
-### Account
+[Learn more](https://docs.fish.audio/developer-guide/sdk-guide/python/voice-cloning)
 
-Check your remaining API credits, usage, and account details:
+## Resource Clients
 
-```python
-from fishaudio import FishAudio
+| Resource | Description | Key Methods |
+|----------|-------------|-------------|
+| `client.tts` | Text-to-speech | `convert()`, `stream()`, `stream_websocket()` |
+| `client.asr` | Speech recognition | `transcribe()` |
+| `client.voices` | Voice management | `list()`, `get()`, `create()`, `update()`, `delete()` |
+| `client.account` | Account info | `get_credits()`, `get_package()` |
 
-client = FishAudio()
-credits = client.account.get_credits()
-print(f"Remaining credits: {credits.credit}")
-```
-
-
-### Optional Dependencies
+## Error Handling
 
-For audio playback utilities to help with playing and saving audio files, install the `utils` extra:
+```python
+from fishaudio.exceptions import (
+    AuthenticationError,
+    RateLimitError,
+    ValidationError,
+    FishAudioError
+)
 
-```bash
-pip install fish-audio-sdk[utils]
+try:
+    audio = client.tts.convert(text="Hello!")
+except AuthenticationError:
+    print("Invalid API key")
+except RateLimitError:
+    print("Rate limit exceeded")
+except ValidationError as e:
+    print(f"Invalid request: {e}")
+except FishAudioError as e:
+    print(f"API error: {e}")
 ```
 
-## Legacy SDK
-
-The legacy `fish_audio_sdk` module continues to be supported for existing projects:
+## Resources
 
-```python
-from fish_audio_sdk import Session
-
-session = Session("your_api_key")
-```
+- **Documentation:** [SDK Guide](https://docs.fish.audio/developer-guide/sdk-guide/python/) | [API Reference](https://docs.fish.audio/api-reference/sdk/python/)
+- **Package:** [PyPI](https://pypi.org/project/fish-audio-sdk/) | [GitHub](https://github.com/fishaudio/fish-audio-python)
+- **Legacy SDK:** [Documentation](https://docs.fish.audio/archive/python-sdk-legacy) | [Migration Guide](https://docs.fish.audio/archive/python-sdk-legacy/migration-guide)
 
-For complete legacy SDK documentation, see the [Legacy API Documentation](https://docs.fish.audio/archive/python-sdk-legacy).
+## License
 
-We recommend migrating to the new `fishaudio` module - see our [Migration Guide](https://docs.fish.audio/archive/python-sdk-legacy/migration-guide) for assistance.
+This project is licensed under the Apache-2.0 License - see the [LICENSE](LICENSE) file for details.
\ No newline at end of file

From 38a7fbd886219e0405b4991cedad1d49b50d6fb8 Mon Sep 17 00:00:00 2001
From: James Ding <jamesding365@gmail.com>
Date: Thu, 13 Nov 2025 20:10:16 -0600
Subject: [PATCH 14/16] feat: update text-to-speech implementation to use
 streaming method

---
 src/fishaudio/core/iterators.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/fishaudio/core/iterators.py b/src/fishaudio/core/iterators.py
index 971618a..fbd5df8 100644
--- a/src/fishaudio/core/iterators.py
+++ b/src/fishaudio/core/iterators.py
@@ -16,10 +16,10 @@ class AudioStream:
         client = FishAudio(api_key="...")
 
         # Collect all audio at once
-        audio = client.tts.convert(text="Hello!").collect()
+        audio = client.tts.stream(text="Hello!").collect()
 
         # Or stream chunks manually
-        for chunk in client.tts.convert(text="Hello!"):
+        for chunk in client.tts.stream(text="Hello!"):
             process_chunk(chunk)
         ```
     """
@@ -47,7 +47,7 @@ def collect(self) -> bytes:
 
         Examples:
             ```python
-            audio = client.tts.convert(text="Hello!").collect()
+            audio = client.tts.stream(text="Hello!").collect()
             with open("output.mp3", "wb") as f:
                 f.write(audio)
             ```
@@ -71,10 +71,11 @@ class AsyncAudioStream:
         client = AsyncFishAudio(api_key="...")
 
         # Collect all audio at once
-        audio = await client.tts.convert(text="Hello!").collect()
+        stream = await client.tts.stream(text="Hello!")
+        audio = await stream.collect()
 
         # Or stream chunks manually
-        async for chunk in client.tts.convert(text="Hello!"):
+        async for chunk in await client.tts.stream(text="Hello!"):
             await process_chunk(chunk)
         ```
     """
@@ -102,7 +103,8 @@ async def collect(self) -> bytes:
 
         Examples:
             ```python
-            audio = await client.tts.convert(text="Hello!").collect()
+            stream = await client.tts.stream(text="Hello!")
+            audio = await stream.collect()
             with open("output.mp3", "wb") as f:
                 f.write(audio)
             ```

From 22b37339a4caf5a80ffaf8bf3902828873ea4e3d Mon Sep 17 00:00:00 2001
From: James Ding <jamesding365@gmail.com>
Date: Thu, 13 Nov 2025 20:25:43 -0600
Subject: [PATCH 15/16] fix: correct async streaming method calls in
 text-to-speech implementation

---
 src/fishaudio/resources/tts.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/fishaudio/resources/tts.py b/src/fishaudio/resources/tts.py
index 578b676..bd3ceec 100644
--- a/src/fishaudio/resources/tts.py
+++ b/src/fishaudio/resources/tts.py
@@ -382,11 +382,12 @@ async def stream(
             client = AsyncFishAudio(api_key="...")
 
             # Stream and process chunks
-            async for chunk in client.tts.stream(text="Hello world"):
+            async for chunk in await client.tts.stream(text="Hello world"):
                 await process_audio_chunk(chunk)
 
             # Or collect all at once
-            audio = await client.tts.stream(text="Hello world").collect()
+            stream = await client.tts.stream(text="Hello world")
+            audio = await stream.collect()
             ```
         """
         # Build request payload from config

From 30846d44fb054eb1749ba37a268825f9637772c2 Mon Sep 17 00:00:00 2001
From: James Ding <jamesding365@gmail.com>
Date: Thu, 13 Nov 2025 20:42:22 -0600
Subject: [PATCH 16/16] chore: update Python version to 3.9 and add future
 annotations for type hints

---
 .github/workflows/python.yml  | 2 +-
 tests/integration/conftest.py | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 79de8c8..f6b6ca4 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -72,7 +72,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
-          python-version: "3.x"
+          python-version: "3.9"
 
       - name: Install uv
         uses: astral-sh/setup-uv@v4
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 7cc0ef1..2d43b32 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -1,5 +1,7 @@
 """Fixtures for integration tests."""
 
+from __future__ import annotations
+
 import os
 from pathlib import Path