diff --git a/.env.example b/.env.example
index 9ab9793..8ccad9e 100644
--- a/.env.example
+++ b/.env.example
@@ -1 +1 @@
-FISH_AUDIO_API_KEY=
\ No newline at end of file
+FISH_API_KEY=
\ No newline at end of file
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 619f999..f6b6ca4 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -72,7 +72,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
-          python-version: "3.x"
+          python-version: "3.9"
 
       - name: Install uv
         uses: astral-sh/setup-uv@v4
@@ -83,7 +83,7 @@ jobs:
       - name: Run integration tests
         run: uv run pytest tests/integration/ -v
         env:
-          FISH_AUDIO_API_KEY: ${{ secrets.FISH_AUDIO_API_KEY }}
+          FISH_API_KEY: ${{ secrets.FISH_API_KEY }}
 
       - name: Upload Test Artifacts
         uses: actions/upload-artifact@v4
diff --git a/README.md b/README.md
index 0923942..1bb11a7 100644
--- a/README.md
+++ b/README.md
@@ -1,205 +1,250 @@
 # Fish Audio Python SDK
 
-To provide convenient Python program integration for https://docs.fish.audio.
+[![PyPI version](https://img.shields.io/pypi/v/fish-audio-sdk.svg)](https://badge.fury.io/py/fish-audio-sdk)
+[![Python Version](https://img.shields.io/badge/python-3.9+-blue)](https://pypi.org/project/fish-audio-sdk/)
+[![PyPI - Downloads](https://img.shields.io/pypi/dm/fish-audio-sdk)](https://pypi.org/project/fish-audio-sdk/)
+[![codecov](https://img.shields.io/codecov/c/github/fishaudio/fish-audio-python)](https://codecov.io/gh/fishaudio/fish-audio-python)
+[![License](https://img.shields.io/github/license/fishaudio/fish-audio-python)](https://github.com/fishaudio/fish-audio-python/blob/main/LICENSE)
 
-## Install
+The official Python library for the Fish Audio API
 
-```bash
-pip install fish-audio-sdk
-```
-> [!NOTE]
-> The new release has not officially been released yet - please see legacy SDK documentation for now.
+**Documentation:** [Python SDK Guide](https://docs.fish.audio/developer-guide/sdk-guide/python/) | [API Reference](https://docs.fish.audio/api-reference/sdk/python/)
 
-## Usage
+> **Note:** If you're using the legacy `fish_audio_sdk` API, see the [migration guide](https://docs.fish.audio/archive/python-sdk-legacy/migration-guide) to upgrade.
 
-### New SDK (Recommended)
+## Installation
 
-The new SDK uses the `fishaudio` module:
-
-```python
-from fishaudio import FishAudio
+```bash
+pip install fish-audio-sdk
 
-client = FishAudio(api_key="your_api_key")
+# With audio playback utilities
+pip install fish-audio-sdk[utils]
 ```
 
-You can customize the base URL:
+## Authentication
 
-```python
-from fishaudio import FishAudio
+Get your API key from [fish.audio/app/api-keys](https://fish.audio/app/api-keys):
 
-client = FishAudio(api_key="your_api_key", base_url="https://your-proxy-domain")
+```bash
+export FISH_API_KEY=your_api_key_here
 ```
 
-### Legacy SDK
-
-The legacy SDK uses the `fish_audio_sdk` module. Initialize a `Session` to use APIs. All APIs have synchronous and asynchronous versions. If you want to use the asynchronous version of the API, you only need to rewrite the original `session.api_call(...)` to `session.api_call.awaitable(...)`.
+Or provide directly:
 
 ```python
-from fish_audio_sdk import Session
+from fishaudio import FishAudio
 
-session = Session("your_api_key")
+client = FishAudio(api_key="your_api_key")
 ```
 
-Sometimes, you may need to change our endpoint to another address. You can use
+## Quick Start
 
-```python
-from fish_audio_sdk import Session
-
-session = Session("your_api_key", base_url="https://your-proxy-domain")
-```
-
-### Text to speech
+**Synchronous:**
 
 ```python
-from fish_audio_sdk import Session, TTSRequest
+from fishaudio import FishAudio
+from fishaudio.utils import play, save
+
+client = FishAudio()
 
-session = Session("your_api_key")
+# Generate audio
+audio = client.tts.convert(text="Hello, world!")
 
-with open("r.mp3", "wb") as f:
-    for chunk in session.tts(TTSRequest(text="Hello, world!")):
-        f.write(chunk)
+# Play or save
+play(audio)
+save(audio, "output.mp3")
 ```
 
-Or use async version:
+**Asynchronous:**
 
 ```python
 import asyncio
-import aiofiles
-
-from fish_audio_sdk import Session, TTSRequest
-
-session = Session("your_api_key")
-
+from fishaudio import AsyncFishAudio
+from fishaudio.utils import play, save
 
 async def main():
-    async with aiofiles.open("r.mp3", "wb") as f:
-        async for chunk in session.tts.awaitable(
-            TTSRequest(text="Hello, world!"),
-        ):
-            await f.write(chunk)
-
+    client = AsyncFishAudio()
+    audio = await client.tts.convert(text="Hello, world!")
+    play(audio)
+    save(audio, "output.mp3")
 
 asyncio.run(main())
 ```
 
-#### Reference Audio
+## Core Features
 
-```python
-from fish_audio_sdk import TTSRequest
+### Text-to-Speech
+
+**With custom voice:**
 
-TTSRequest(
-    text="Hello, world!",
-    reference_id="your_model_id",
+```python
+# Use a specific voice by ID
+audio = client.tts.convert(
+    text="Custom voice",
+    reference_id="802e3bc2b27e49c2995d23ef70e6ac89"
 )
 ```
 
-Or just use `ReferenceAudio` in `TTSRequest`:
+**With speed control:**
 
 ```python
-from fish_audio_sdk import TTSRequest, ReferenceAudio
-
-TTSRequest(
-    text="Hello, world!",
-    references=[
-        ReferenceAudio(
-            audio=audio_file.read(),
-            text="reference audio text",
-        )
-    ],
+audio = client.tts.convert(
+    text="Speaking faster!",
+    speed=1.5  # 1.5x speed
 )
 ```
 
-### List models
+**Reusable configuration:**
 
 ```python
-models = session.list_models()
-print(models)
+from fishaudio.types import TTSConfig, Prosody
+
+config = TTSConfig(
+    prosody=Prosody(speed=1.2, volume=-5),
+    reference_id="933563129e564b19a115bedd57b7406a",
+    format="wav",
+    latency="balanced"
+)
+
+# Reuse across generations
+audio1 = client.tts.convert(text="First message", config=config)
+audio2 = client.tts.convert(text="Second message", config=config)
 ```
 
-Or use async version:
+**Chunk-by-chunk processing:**
 
 ```python
-import asyncio
+# Stream and process chunks as they arrive
+for chunk in client.tts.stream(text="Long content..."):
+    send_to_websocket(chunk)
 
+# Or collect all chunks
+audio = client.tts.stream(text="Hello!").collect()
+```
 
-async def main():
-    models = await session.list_models.awaitable()
-    print(models)
+[Learn more](https://docs.fish.audio/developer-guide/sdk-guide/python/text-to-speech)
 
+### Speech-to-Text
 
-asyncio.run(main())
+```python
+# Transcribe audio
+with open("audio.wav", "rb") as f:
+    result = client.asr.transcribe(audio=f.read(), language="en")
+
+print(result.text)
+
+# Access timestamped segments
+for segment in result.segments:
+    print(f"[{segment.start:.2f}s - {segment.end:.2f}s] {segment.text}")
 ```
 
+[Learn more](https://docs.fish.audio/developer-guide/sdk-guide/python/speech-to-text)
+
+### Real-time Streaming
 
+Stream dynamically generated text for conversational AI and live applications:
 
-### Get a model info by id
+**Synchronous:**
 
 ```python
-model = session.get_model("your_model_id")
-print(model)
+def text_chunks():
+    yield "Hello, "
+    yield "this is "
+    yield "streaming!"
+
+audio_stream = client.tts.stream_websocket(text_chunks(), latency="balanced")
+play(audio_stream)
 ```
 
-Or use async version:
+**Asynchronous:**
 
 ```python
-import asyncio
+async def text_chunks():
+    yield "Hello, "
+    yield "this is "
+    yield "streaming!"
 
+audio_stream = await client.tts.stream_websocket(text_chunks(), latency="balanced")
+play(audio_stream)
+```
 
-async def main():
-    model = await session.get_model.awaitable("your_model_id")
-    print(model)
-
+[Learn more](https://docs.fish.audio/developer-guide/sdk-guide/python/websocket)
 
-asyncio.run(main())
-```
+### Voice Cloning
 
-### Create a model
+**Instant cloning:**
 
 ```python
-model = session.create_model(
-    title="test",
-    description="test",
-    voices=[voice_file.read(), other_voice_file.read()],
-    cover_image=image_file.read(),
-)
-print(model)
+from fishaudio.types import ReferenceAudio
+
+# Clone voice on-the-fly
+with open("reference.wav", "rb") as f:
+    audio = client.tts.convert(
+        text="Cloned voice speaking",
+        references=[ReferenceAudio(
+            audio=f.read(),
+            text="Text spoken in reference"
+        )]
+    )
 ```
 
-Or use async version:
+**Persistent voice models:**
 
 ```python
-import asyncio
-
-
-async def main():
-    model = await session.create_model.awaitable(
-        title="test",
-        description="test",
-        voices=[voice_file.read(), other_voice_file.read()],
-        cover_image=image_file.read(),
+# Create voice model for reuse
+with open("voice_sample.wav", "rb") as f:
+    voice = client.voices.create(
+        title="My Voice",
+        voices=[f.read()],
+        description="Custom voice clone"
     )
-    print(model)
 
-
-asyncio.run(main())
+# Use the created model
+audio = client.tts.convert(
+    text="Using my saved voice",
+    reference_id=voice.id
+)
 ```
 
+[Learn more](https://docs.fish.audio/developer-guide/sdk-guide/python/voice-cloning)
 
-### Delete a model
+## Resource Clients
 
-```python
-session.delete_model("your_model_id")
-```
+| Resource | Description | Key Methods |
+|----------|-------------|-------------|
+| `client.tts` | Text-to-speech | `convert()`, `stream()`, `stream_websocket()` |
+| `client.asr` | Speech recognition | `transcribe()` |
+| `client.voices` | Voice management | `list()`, `get()`, `create()`, `update()`, `delete()` |
+| `client.account` | Account info | `get_credits()`, `get_package()` |
 
-Or use async version:
+## Error Handling
 
 ```python
-import asyncio
+from fishaudio.exceptions import (
+    AuthenticationError,
+    RateLimitError,
+    ValidationError,
+    FishAudioError
+)
+
+try:
+    audio = client.tts.convert(text="Hello!")
+except AuthenticationError:
+    print("Invalid API key")
+except RateLimitError:
+    print("Rate limit exceeded")
+except ValidationError as e:
+    print(f"Invalid request: {e}")
+except FishAudioError as e:
+    print(f"API error: {e}")
+```
 
+## Resources
 
-async def main():
-    await session.delete_model.awaitable("your_model_id")
+- **Documentation:** [SDK Guide](https://docs.fish.audio/developer-guide/sdk-guide/python/) | [API Reference](https://docs.fish.audio/api-reference/sdk/python/)
+- **Package:** [PyPI](https://pypi.org/project/fish-audio-sdk/) | [GitHub](https://github.com/fishaudio/fish-audio-python)
+- **Legacy SDK:** [Documentation](https://docs.fish.audio/archive/python-sdk-legacy) | [Migration Guide](https://docs.fish.audio/archive/python-sdk-legacy/migration-guide)
 
+## License
 
-asyncio.run(main())
-```
+This project is licensed under the Apache-2.0 License - see the [LICENSE](LICENSE) file for details.
\ No newline at end of file
diff --git a/examples/README.md b/examples/README.md
index cc7510d..1132cd1 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -5,5 +5,5 @@ Example scripts demonstrating how to use the Fish Audio Python SDK.
 ```bash
 # Install and setup
 pip install fishaudio
-export FISH_AUDIO_API_KEY="your_api_key"
+export FISH_API_KEY="your_api_key"
 ```
\ No newline at end of file
diff --git a/examples/getting-started/01_simple_tts.py b/examples/getting-started/01_simple_tts.py
index f312848..2574ce7 100644
--- a/examples/getting-started/01_simple_tts.py
+++ b/examples/getting-started/01_simple_tts.py
@@ -10,7 +10,7 @@
     pip install fishaudio
 
 Environment Setup:
-    export FISH_AUDIO_API_KEY="your_api_key_here"
+    export FISH_API_KEY="your_api_key_here"
     # Or pass api_key directly to the client
 
 Expected Output:
@@ -25,7 +25,7 @@
 
 def main():
     # Initialize the client with your API key
-    # Option 1: Use environment variable FISH_AUDIO_API_KEY
+    # Option 1: Use environment variable FISH_API_KEY
     # Option 2: Pass api_key directly: FishAudio(api_key="your_key")
     client = FishAudio()
 
@@ -52,4 +52,4 @@ def main():
     except Exception as e:
         print(f"Error: {e}")
         print("\nMake sure you have set your API key:")
-        print("  export FISH_AUDIO_API_KEY='your_api_key'")
+        print("  export FISH_API_KEY='your_api_key'")
diff --git a/examples/getting-started/02_play_audio.py b/examples/getting-started/02_play_audio.py
index 5b62750..34c8e29 100644
--- a/examples/getting-started/02_play_audio.py
+++ b/examples/getting-started/02_play_audio.py
@@ -19,7 +19,7 @@
     #   pip install sounddevice soundfile
 
 Environment Setup:
-    export FISH_AUDIO_API_KEY="your_api_key_here"
+    export FISH_API_KEY="your_api_key_here"
 
 Expected Output:
     - Plays the generated audio through your speakers
@@ -98,7 +98,7 @@ def demo_playback_methods():
     except Exception as e:
         print(f"Error: {e}")
         print("\nTroubleshooting:")
-        print("1. Make sure your API key is set: export FISH_AUDIO_API_KEY='your_key'")
+        print("1. Make sure your API key is set: export FISH_API_KEY='your_key'")
         print("2. Install ffmpeg for audio playback:")
         print("   - macOS: brew install ffmpeg")
         print("   - Ubuntu: sudo apt install ffmpeg")
diff --git a/examples/getting-started/03_check_credits.py b/examples/getting-started/03_check_credits.py
index 68fc721..87412ec 100644
--- a/examples/getting-started/03_check_credits.py
+++ b/examples/getting-started/03_check_credits.py
@@ -13,7 +13,7 @@
     pip install fishaudio
 
 Environment Setup:
-    export FISH_AUDIO_API_KEY="your_api_key_here"
+    export FISH_API_KEY="your_api_key_here"
 
 Expected Output:
     - Displays account credit balance
@@ -84,7 +84,7 @@ def check_api_setup():
         print(f"  Error: {e}")
         print("\nPlease check:")
         print("  1. Your API key is correct")
-        print("  2. Environment variable is set: export FISH_AUDIO_API_KEY='your_key'")
+        print("  2. Environment variable is set: export FISH_API_KEY='your_key'")
         print("  3. You have an active internet connection")
         return False
 
@@ -100,6 +100,6 @@ def check_api_setup():
     except Exception as e:
         print(f"\nError: {e}")
         print("\nMake sure you have set your API key:")
-        print("  export FISH_AUDIO_API_KEY='your_api_key'")
+        print("  export FISH_API_KEY='your_api_key'")
         print("\nOr pass it directly when creating the client:")
         print("  client = FishAudio(api_key='your_api_key')")
diff --git a/pyproject.toml b/pyproject.toml
index 8aa9dfb..49c8d9d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,7 +17,7 @@ readme = "README.md"
 license = {text = "Apache-2.0"}
 keywords = ["fish-audio", "tts", "text-to-speech", "voice-cloning", "ai", "speech-synthesis"]
 classifiers = [
-    "Development Status :: 4 - Beta",
+    "Development Status :: 5 - Production/Stable",
     "Intended Audience :: Developers",
     "License :: OSI Approved :: Apache Software License",
     "Programming Language :: Python :: 3",
diff --git a/scripts/copy_docs.py b/scripts/copy_docs.py
index a2587eb..ea82907 100644
--- a/scripts/copy_docs.py
+++ b/scripts/copy_docs.py
@@ -142,7 +142,7 @@ def copy_docs(sdk_root: Path, docs_root: Path) -> None:
         python_sdk_dir,
         lambda content: add_frontmatter(
             content,
-            title="Python SDK",
+            title="Overview",
             description="Fish Audio Python SDK for text-to-speech and voice cloning",
             icon="python",
         ),
diff --git a/src/fishaudio/__init__.py b/src/fishaudio/__init__.py
index bf33f15..dcedf83 100644
--- a/src/fishaudio/__init__.py
+++ b/src/fishaudio/__init__.py
@@ -28,6 +28,7 @@
 
 from ._version import __version__
 from .client import AsyncFishAudio, FishAudio
+from .core.iterators import AsyncAudioStream, AudioStream
 from .exceptions import (
     APIError,
     AuthenticationError,
@@ -52,6 +53,9 @@
     "play",
     "save",
     "stream",
+    # Audio streams
+    "AudioStream",
+    "AsyncAudioStream",
     # Types
     "FlushEvent",
     "TextEvent",
diff --git a/src/fishaudio/client.py b/src/fishaudio/client.py
index 5a914cf..53be1ec 100644
--- a/src/fishaudio/client.py
+++ b/src/fishaudio/client.py
@@ -51,7 +51,7 @@ def __init__(
         Initialize Fish Audio client.
 
         Args:
-            api_key: API key (can also use FISH_AUDIO_API_KEY env var)
+            api_key: API key (can also use FISH_API_KEY env var)
             base_url: API base URL
             timeout: Request timeout in seconds
             httpx_client: Optional custom HTTP client
@@ -145,7 +145,7 @@ def __init__(
         Initialize async Fish Audio client.
 
         Args:
-            api_key: API key (can also use FISH_AUDIO_API_KEY env var)
+            api_key: API key (can also use FISH_API_KEY env var)
             base_url: API base URL
             timeout: Request timeout in seconds
             httpx_client: Optional custom async HTTP client
diff --git a/src/fishaudio/core/client_wrapper.py b/src/fishaudio/core/client_wrapper.py
index 2173f28..f1232f7 100644
--- a/src/fishaudio/core/client_wrapper.py
+++ b/src/fishaudio/core/client_wrapper.py
@@ -53,10 +53,10 @@ def __init__(
         api_key: Optional[str] = None,
         base_url: str = "https://api.fish.audio",
     ):
-        self.api_key = api_key or os.getenv("FISH_AUDIO_API_KEY")
+        self.api_key = api_key or os.getenv("FISH_API_KEY")
         if not self.api_key:
             raise ValueError(
-                "API key must be provided either as argument or via FISH_AUDIO_API_KEY environment variable"
+                "API key must be provided either as argument or via FISH_API_KEY environment variable"
             )
         self.base_url = base_url
 
diff --git a/src/fishaudio/core/iterators.py b/src/fishaudio/core/iterators.py
new file mode 100644
index 0000000..fbd5df8
--- /dev/null
+++ b/src/fishaudio/core/iterators.py
@@ -0,0 +1,115 @@
+"""Audio stream wrappers with collection utilities."""
+
+from typing import AsyncIterator, Iterator
+
+
+class AudioStream:
+    """Wrapper for sync audio byte streams with collection utilities.
+
+    This class wraps an iterator of audio bytes and provides a convenient
+    `.collect()` method to gather all chunks into a single bytes object.
+
+    Examples:
+        ```python
+        from fishaudio import FishAudio
+
+        client = FishAudio(api_key="...")
+
+        # Collect all audio at once
+        audio = client.tts.stream(text="Hello!").collect()
+
+        # Or stream chunks manually
+        for chunk in client.tts.stream(text="Hello!"):
+            process_chunk(chunk)
+        ```
+    """
+
+    def __init__(self, iterator: Iterator[bytes]):
+        """Initialize the audio iterator wrapper.
+
+        Args:
+            iterator: The underlying iterator of audio bytes
+        """
+        self._iter = iterator
+
+    def __iter__(self) -> Iterator[bytes]:
+        """Allow direct iteration over audio chunks."""
+        return self._iter
+
+    def collect(self) -> bytes:
+        """Collect all audio chunks into a single bytes object.
+
+        This consumes the iterator and returns all audio data as bytes.
+        After calling this method, the iterator cannot be used again.
+
+        Returns:
+            Complete audio data as bytes
+
+        Examples:
+            ```python
+            audio = client.tts.stream(text="Hello!").collect()
+            with open("output.mp3", "wb") as f:
+                f.write(audio)
+            ```
+        """
+        chunks = []
+        for chunk in self._iter:
+            chunks.append(chunk)
+        return b"".join(chunks)
+
+
+class AsyncAudioStream:
+    """Wrapper for async audio byte streams with collection utilities.
+
+    This class wraps an async iterator of audio bytes and provides a convenient
+    `.collect()` method to gather all chunks into a single bytes object.
+
+    Examples:
+        ```python
+        from fishaudio import AsyncFishAudio
+
+        client = AsyncFishAudio(api_key="...")
+
+        # Collect all audio at once
+        stream = await client.tts.stream(text="Hello!")
+        audio = await stream.collect()
+
+        # Or stream chunks manually
+        async for chunk in await client.tts.stream(text="Hello!"):
+            await process_chunk(chunk)
+        ```
+    """
+
+    def __init__(self, async_iterator: AsyncIterator[bytes]):
+        """Initialize the async audio iterator wrapper.
+
+        Args:
+            async_iterator: The underlying async iterator of audio bytes
+        """
+        self._iter = async_iterator
+
+    def __aiter__(self) -> AsyncIterator[bytes]:
+        """Allow direct async iteration over audio chunks."""
+        return self._iter
+
+    async def collect(self) -> bytes:
+        """Collect all audio chunks into a single bytes object.
+
+        This consumes the async iterator and returns all audio data as bytes.
+        After calling this method, the iterator cannot be used again.
+
+        Returns:
+            Complete audio data as bytes
+
+        Examples:
+            ```python
+            stream = await client.tts.stream(text="Hello!")
+            audio = await stream.collect()
+            with open("output.mp3", "wb") as f:
+                f.write(audio)
+            ```
+        """
+        chunks = []
+        async for chunk in self._iter:
+            chunks.append(chunk)
+        return b"".join(chunks)
diff --git a/src/fishaudio/resources/tts.py b/src/fishaudio/resources/tts.py
index fef1cd4..bd3ceec 100644
--- a/src/fishaudio/resources/tts.py
+++ b/src/fishaudio/resources/tts.py
@@ -9,6 +9,7 @@
 
 from .realtime import aiter_websocket_audio, iter_websocket_audio
 from ..core import AsyncClientWrapper, ClientWrapper, RequestOptions
+from ..core.iterators import AsyncAudioStream, AudioStream
 from ..types import (
     AudioFormat,
     CloseEvent,
@@ -58,7 +59,7 @@ class TTSClient:
     def __init__(self, client_wrapper: ClientWrapper):
         self._client = client_wrapper
 
-    def convert(
+    def stream(
         self,
         *,
         text: str,
@@ -70,9 +71,9 @@ def convert(
         config: TTSConfig = TTSConfig(),
         model: Model = "s1",
         request_options: Optional[RequestOptions] = None,
-    ) -> Iterator[bytes]:
+    ) -> AudioStream:
         """
-        Convert text to speech.
+        Stream text-to-speech audio chunks.
 
         Args:
             text: Text to synthesize
@@ -86,48 +87,20 @@ def convert(
             request_options: Request-level overrides
 
         Returns:
-            Iterator of audio bytes
+            AudioStream object that can be iterated for audio chunks
 
         Example:
             ```python
-            from fishaudio import FishAudio, TTSConfig, ReferenceAudio
+            from fishaudio import FishAudio
 
             client = FishAudio(api_key="...")
 
-            # Simple usage with defaults
-            audio = client.tts.convert(text="Hello world")
-
-            # With format parameter
-            audio = client.tts.convert(text="Hello world", format="wav")
-
-            # With speed parameter
-            audio = client.tts.convert(text="Hello world", speed=1.5)
-
-            # With reference_id parameter
-            audio = client.tts.convert(text="Hello world", reference_id="your_model_id")
-
-            # With references parameter
-            audio = client.tts.convert(
-                text="Hello world",
-                references=[ReferenceAudio(audio=audio_bytes, text="sample")]
-            )
-
-            # Combine multiple parameters
-            audio = client.tts.convert(
-                text="Hello world",
-                format="wav",
-                speed=1.2,
-                latency="normal"
-            )
-
-            # Parameters override config values
-            config = TTSConfig(format="mp3", prosody=Prosody(speed=1.0))
-            audio = client.tts.convert(text="Hello world", format="wav", config=config)
-            # Result: format="wav" (parameter wins)
+            # Stream and process chunks
+            for chunk in client.tts.stream(text="Hello world"):
+                process_audio_chunk(chunk)
 
-            with open("output.mp3", "wb") as f:
-                for chunk in audio:
-                    f.write(chunk)
+            # Or collect all at once
+            audio = client.tts.stream(text="Hello world").collect()
             ```
         """
         # Build request payload from config
@@ -160,10 +133,75 @@ def convert(
             request_options=request_options,
         )
 
-        # Stream response chunks
-        for chunk in response.iter_bytes():
-            if chunk:
-                yield chunk
+        # Create generator and wrap with AudioStream
+        def _stream():
+            for chunk in response.iter_bytes():
+                if chunk:
+                    yield chunk
+
+        return AudioStream(_stream())
+
+    def convert(
+        self,
+        *,
+        text: str,
+        reference_id: Optional[str] = None,
+        references: Optional[List[ReferenceAudio]] = None,
+        format: Optional[AudioFormat] = None,
+        latency: Optional[LatencyMode] = None,
+        speed: Optional[float] = None,
+        config: TTSConfig = TTSConfig(),
+        model: Model = "s1",
+        request_options: Optional[RequestOptions] = None,
+    ) -> bytes:
+        """
+        Convert text to speech and return complete audio as bytes.
+
+        This is a convenience method that streams all audio chunks and combines them.
+        For chunk-by-chunk processing, use stream() instead.
+
+        Args:
+            text: Text to synthesize
+            reference_id: Voice reference ID (overrides config.reference_id if provided)
+            references: Reference audio samples (overrides config.references if provided)
+            format: Audio format - "mp3", "wav", "pcm", or "opus" (overrides config.format if provided)
+            latency: Latency mode - "normal" or "balanced" (overrides config.latency if provided)
+            speed: Speech speed multiplier, e.g. 1.5 for 1.5x speed (overrides config.prosody.speed if provided)
+            config: TTS configuration (audio settings, voice, model parameters)
+            model: TTS model to use
+            request_options: Request-level overrides
+
+        Returns:
+            Complete audio as bytes
+
+        Example:
+            ```python
+            from fishaudio import FishAudio
+            from fishaudio.utils import play, save
+
+            client = FishAudio(api_key="...")
+
+            # Get complete audio
+            audio = client.tts.convert(text="Hello world")
+
+            # Play it
+            play(audio)
+
+            # Or save it
+            save(audio, "output.mp3")
+            ```
+        """
+        return self.stream(
+            text=text,
+            reference_id=reference_id,
+            references=references,
+            format=format,
+            latency=latency,
+            speed=speed,
+            config=config,
+            model=model,
+            request_options=request_options,
+        ).collect()
 
     def stream_websocket(
         self,
@@ -307,7 +345,7 @@ class AsyncTTSClient:
     def __init__(self, client_wrapper: AsyncClientWrapper):
         self._client = client_wrapper
 
-    async def convert(
+    async def stream(
         self,
         *,
         text: str,
@@ -319,9 +357,9 @@ async def convert(
         config: TTSConfig = TTSConfig(),
         model: Model = "s1",
         request_options: Optional[RequestOptions] = None,
-    ):
+    ) -> AsyncAudioStream:
         """
-        Convert text to speech (async).
+        Stream text-to-speech audio chunks (async).
 
         Args:
             text: Text to synthesize
@@ -335,48 +373,21 @@ async def convert(
             request_options: Request-level overrides
 
         Returns:
-            Async iterator of audio bytes
+            AsyncAudioStream object that can be iterated for audio chunks
 
         Example:
             ```python
-            from fishaudio import AsyncFishAudio, TTSConfig, ReferenceAudio
+            from fishaudio import AsyncFishAudio
 
             client = AsyncFishAudio(api_key="...")
 
-            # Simple usage with defaults
-            audio = await client.tts.convert(text="Hello world")
-
-            # With format parameter
-            audio = await client.tts.convert(text="Hello world", format="wav")
-
-            # With speed parameter
-            audio = await client.tts.convert(text="Hello world", speed=1.5)
-
-            # With reference_id parameter
-            audio = await client.tts.convert(text="Hello world", reference_id="your_model_id")
-
-            # With references parameter
-            audio = await client.tts.convert(
-                text="Hello world",
-                references=[ReferenceAudio(audio=audio_bytes, text="sample")]
-            )
-
-            # Combine multiple parameters
-            audio = await client.tts.convert(
-                text="Hello world",
-                format="wav",
-                speed=1.2,
-                latency="normal"
-            )
-
-            # Parameters override config values
-            config = TTSConfig(format="mp3", prosody=Prosody(speed=1.0))
-            audio = await client.tts.convert(text="Hello world", format="wav", config=config)
-            # Result: format="wav" (parameter wins)
+            # Stream and process chunks
+            async for chunk in await client.tts.stream(text="Hello world"):
+                await process_audio_chunk(chunk)
 
-            async with aiofiles.open("output.mp3", "wb") as f:
-                async for chunk in audio:
-                    await f.write(chunk)
+            # Or collect all at once
+            stream = await client.tts.stream(text="Hello world")
+            audio = await stream.collect()
             ```
         """
         # Build request payload from config
@@ -409,10 +420,76 @@ async def convert(
             request_options=request_options,
         )
 
-        # Stream response chunks
-        async for chunk in response.aiter_bytes():
-            if chunk:
-                yield chunk
+        # Create async generator and wrap with AsyncAudioStream
+        async def _stream():
+            async for chunk in response.aiter_bytes():
+                if chunk:
+                    yield chunk
+
+        return AsyncAudioStream(_stream())
+
+    async def convert(
+        self,
+        *,
+        text: str,
+        reference_id: Optional[str] = None,
+        references: Optional[List[ReferenceAudio]] = None,
+        format: Optional[AudioFormat] = None,
+        latency: Optional[LatencyMode] = None,
+        speed: Optional[float] = None,
+        config: TTSConfig = TTSConfig(),
+        model: Model = "s1",
+        request_options: Optional[RequestOptions] = None,
+    ) -> bytes:
+        """
+        Convert text to speech and return complete audio as bytes (async).
+
+        This is a convenience method that streams all audio chunks and combines them.
+        For chunk-by-chunk processing, use stream() instead.
+
+        Args:
+            text: Text to synthesize
+            reference_id: Voice reference ID (overrides config.reference_id if provided)
+            references: Reference audio samples (overrides config.references if provided)
+            format: Audio format - "mp3", "wav", "pcm", or "opus" (overrides config.format if provided)
+            latency: Latency mode - "normal" or "balanced" (overrides config.latency if provided)
+            speed: Speech speed multiplier, e.g. 1.5 for 1.5x speed (overrides config.prosody.speed if provided)
+            config: TTS configuration (audio settings, voice, model parameters)
+            model: TTS model to use
+            request_options: Request-level overrides
+
+        Returns:
+            Complete audio as bytes
+
+        Example:
+            ```python
+            from fishaudio import AsyncFishAudio
+            from fishaudio.utils import play, save
+
+            client = AsyncFishAudio(api_key="...")
+
+            # Get complete audio
+            audio = await client.tts.convert(text="Hello world")
+
+            # Play it
+            play(audio)
+
+            # Or save it
+            save(audio, "output.mp3")
+            ```
+        """
+        stream = await self.stream(
+            text=text,
+            reference_id=reference_id,
+            references=references,
+            format=format,
+            latency=latency,
+            speed=speed,
+            config=config,
+            model=model,
+            request_options=request_options,
+        )
+        return await stream.collect()
 
     async def stream_websocket(
         self,
diff --git a/src/fishaudio/types/account.py b/src/fishaudio/types/account.py
index 2803383..966ade5 100644
--- a/src/fishaudio/types/account.py
+++ b/src/fishaudio/types/account.py
@@ -7,7 +7,17 @@
 
 
 class Credits(BaseModel):
-    """User's API credit balance."""
+    """User's API credit balance.
+
+    Attributes:
+        id: Unique credits record identifier
+        user_id: User identifier
+        credit: Current credit balance (decimal for precise accounting)
+        created_at: Timestamp when the credits record was created
+        updated_at: Timestamp when the credits were last updated
+        has_phone_sha256: Whether the user has a verified phone number. Optional
+        has_free_credit: Whether the user has received free credits. Optional
+    """
 
     model_config = ConfigDict(populate_by_name=True)
 
@@ -21,7 +31,18 @@ class Credits(BaseModel):
 
 
 class Package(BaseModel):
-    """User's prepaid package information."""
+    """User's prepaid package information.
+
+    Attributes:
+        id: Unique package identifier
+        user_id: User identifier
+        type: Package type identifier
+        total: Total units in the package
+        balance: Remaining units in the package
+        created_at: Timestamp when the package was purchased
+        updated_at: Timestamp when the package was last updated
+        finished_at: Timestamp when the package was fully consumed. None if still active
+    """
 
     model_config = ConfigDict(populate_by_name=True)
 
diff --git a/src/fishaudio/types/asr.py b/src/fishaudio/types/asr.py
index 84d2dbb..db73916 100644
--- a/src/fishaudio/types/asr.py
+++ b/src/fishaudio/types/asr.py
@@ -6,7 +6,13 @@
 
 
 class ASRSegment(BaseModel):
-    """A timestamped segment of transcribed text."""
+    """A timestamped segment of transcribed text.
+
+    Attributes:
+        text: The transcribed text for this segment
+        start: Segment start time in seconds
+        end: Segment end time in seconds
+    """
 
     text: str
     start: float
@@ -14,7 +20,13 @@ class ASRSegment(BaseModel):
 
 
 class ASRResponse(BaseModel):
-    """Response from speech-to-text transcription."""
+    """Response from speech-to-text transcription.
+
+    Attributes:
+        text: Complete transcription of the entire audio
+        duration: Total audio duration in milliseconds
+        segments: List of timestamped text segments. Empty if include_timestamps=False
+    """
 
     text: str
     duration: float  # Duration in milliseconds
diff --git a/src/fishaudio/types/shared.py b/src/fishaudio/types/shared.py
index df7ab4a..1e756d9 100644
--- a/src/fishaudio/types/shared.py
+++ b/src/fishaudio/types/shared.py
@@ -9,7 +9,12 @@
 
 
 class PaginatedResponse(BaseModel, Generic[T]):
-    """Generic paginated response."""
+    """Generic paginated response.
+
+    Attributes:
+        total: Total number of items across all pages
+        items: List of items on the current page
+    """
 
     total: int
     items: List[T]
@@ -25,7 +30,7 @@ class PaginatedResponse(BaseModel, Generic[T]):
 Visibility = Literal["public", "unlist", "private"]
 
 # Training mode types
-TrainMode = Literal["fast", "full"]
+TrainMode = Literal["fast"]
 
 # Model state types
 ModelState = Literal["created", "training", "trained", "failed"]
diff --git a/src/fishaudio/types/tts.py b/src/fishaudio/types/tts.py
index 4dd7671..8b0923a 100644
--- a/src/fishaudio/types/tts.py
+++ b/src/fishaudio/types/tts.py
@@ -8,14 +8,27 @@
 
 
 class ReferenceAudio(BaseModel):
-    """Reference audio for voice cloning/style."""
+    """Reference audio for voice cloning/style.
+
+    Attributes:
+        audio: Audio file bytes for the reference sample
+        text: Transcription of what is spoken in the reference audio. Should match exactly
+            what's spoken and include punctuation for proper prosody.
+    """
 
     audio: bytes
     text: str
 
 
 class Prosody(BaseModel):
-    """Speech prosody settings (speed and volume)."""
+    """Speech prosody settings (speed and volume).
+
+    Attributes:
+        speed: Speech speed multiplier. Range: 0.5-2.0. Default: 1.0.
+            Examples: 1.5 = 50% faster, 0.8 = 20% slower
+        volume: Volume adjustment in decibels. Range: -20.0 to 20.0. Default: 0.0 (no change).
+            Positive values increase volume, negative values decrease it.
+    """
 
     speed: Annotated[float, Field(ge=0.5, le=2.0)] = 1.0
     volume: Annotated[float, Field(ge=-20.0, le=20.0)] = 0.0
@@ -45,6 +58,23 @@ class TTSConfig(BaseModel):
 
     Reusable configuration for text-to-speech requests. Create once, use multiple times.
     All parameters have sensible defaults.
+
+    Attributes:
+        format: Audio output format. Options: "mp3", "wav", "pcm", "opus". Default: "mp3"
+        sample_rate: Audio sample rate in Hz. If None, uses format-specific default.
+        mp3_bitrate: MP3 bitrate in kbps. Options: 64, 128, 192. Default: 128
+        opus_bitrate: Opus bitrate in kbps. Options: -1000, 24, 32, 48, 64. Default: 32
+        normalize: Whether to normalize/clean the input text. Default: True
+        chunk_length: Characters per generation chunk. Range: 100-300. Default: 200.
+            Lower values = faster initial response, higher values = better quality
+        latency: Generation mode. Options: "normal" (higher quality), "balanced" (faster). Default: "balanced"
+        reference_id: Voice model ID from fish.audio (e.g., "802e3bc2b27e49c2995d23ef70e6ac89").
+            Find IDs in voice URLs or via voices.list()
+        references: List of reference audio samples for instant voice cloning. Default: []
+        prosody: Speech speed and volume settings. Default: None (uses natural prosody)
+        top_p: Nucleus sampling parameter for token selection. Range: 0.0-1.0. Default: 0.7
+        temperature: Randomness in generation. Range: 0.0-1.0. Default: 0.7.
+            Higher = more varied, lower = more consistent
     """
 
     # Audio output settings
@@ -74,6 +104,21 @@ class TTSRequest(BaseModel):
 
     This model is used internally for WebSocket streaming.
     For the HTTP API, parameters are passed directly to methods.
+
+    Attributes:
+        text: Text to synthesize into speech
+        chunk_length: Characters per generation chunk. Range: 100-300. Default: 200
+        format: Audio output format. Options: "mp3", "wav", "pcm", "opus". Default: "mp3"
+        sample_rate: Audio sample rate in Hz. If None, uses format-specific default
+        mp3_bitrate: MP3 bitrate in kbps. Options: 64, 128, 192. Default: 128
+        opus_bitrate: Opus bitrate in kbps. Options: -1000, 24, 32, 48, 64. Default: 32
+        references: List of reference audio samples for voice cloning. Default: []
+        reference_id: Voice model ID for using a specific voice. Default: None
+        normalize: Whether to normalize/clean the input text. Default: True
+        latency: Generation mode. Options: "normal", "balanced". Default: "balanced"
+        prosody: Speech speed and volume settings. Default: None
+        top_p: Nucleus sampling for token selection. Range: 0.0-1.0. Default: 0.7
+        temperature: Randomness in generation. Range: 0.0-1.0. Default: 0.7
     """
 
     text: str
@@ -93,26 +138,46 @@ class TTSRequest(BaseModel):
 
 # WebSocket event types for streaming TTS
 class StartEvent(BaseModel):
-    """WebSocket start event."""
+    """WebSocket start event to initiate TTS streaming.
+
+    Attributes:
+        event: Event type identifier, always "start"
+        request: TTS configuration for the streaming session
+    """
 
     event: Literal["start"] = "start"
     request: TTSRequest
 
 
 class TextEvent(BaseModel):
-    """WebSocket text chunk event."""
+    """WebSocket event to send a text chunk for synthesis.
+
+    Attributes:
+        event: Event type identifier, always "text"
+        text: Text chunk to synthesize
+    """
 
     event: Literal["text"] = "text"
     text: str
 
 
 class FlushEvent(BaseModel):
-    """WebSocket flush event - forces buffer to generate audio immediately."""
+    """WebSocket event to force immediate audio generation from buffered text.
+
+    Use this to ensure all buffered text is synthesized without waiting for more input.
+
+    Attributes:
+        event: Event type identifier, always "flush"
+    """
 
     event: Literal["flush"] = "flush"
 
 
 class CloseEvent(BaseModel):
-    """WebSocket close event."""
+    """WebSocket event to end the streaming session.
+
+    Attributes:
+        event: Event type identifier, always "stop"
+    """
 
     event: Literal["stop"] = "stop"
diff --git a/src/fishaudio/types/voices.py b/src/fishaudio/types/voices.py
index 90e41b2..04f7570 100644
--- a/src/fishaudio/types/voices.py
+++ b/src/fishaudio/types/voices.py
@@ -9,7 +9,14 @@
 
 
 class Sample(BaseModel):
-    """A sample audio for a voice model."""
+    """A sample audio for a voice model.
+
+    Attributes:
+        title: Title/name of the audio sample
+        text: Transcription of the spoken content in the sample
+        task_id: Unique identifier for the sample task
+        audio: URL or path to the audio file
+    """
 
     title: str
     text: str
@@ -18,7 +25,13 @@ class Sample(BaseModel):
 
 
 class Author(BaseModel):
-    """Voice model author information."""
+    """Voice model author information.
+
+    Attributes:
+        id: Unique author identifier
+        nickname: Author's display name
+        avatar: URL to author's avatar image
+    """
 
     id: str = Field(alias="_id")
     nickname: str
@@ -27,9 +40,32 @@ class Author(BaseModel):
 
 class Voice(BaseModel):
     """
-    A voice model
+    A voice model.
 
     Represents a TTS voice that can be used for synthesis.
+
+    Attributes:
+        id: Unique voice model identifier (use as reference_id in TTS)
+        type: Model type. Options: "svc" (singing voice conversion), "tts" (text-to-speech)
+        title: Voice model title/name
+        description: Detailed description of the voice model
+        cover_image: URL to the voice model's cover image
+        train_mode: Training mode used. Options: "fast"
+        state: Current model state (e.g., "ready", "training", "failed")
+        tags: List of tags for categorization (e.g., ["male", "english", "young"])
+        samples: List of audio samples demonstrating the voice
+        created_at: Timestamp when the model was created
+        updated_at: Timestamp when the model was last updated
+        languages: List of supported language codes (e.g., ["en", "zh"])
+        visibility: Model visibility. Options: "public", "private", "unlisted"
+        lock_visibility: Whether visibility setting is locked
+        like_count: Number of likes the model has received
+        mark_count: Number of bookmarks/favorites
+        shared_count: Number of times the model has been shared
+        task_count: Number of times the model has been used for generation
+        liked: Whether the current user has liked this model. Default: False
+        marked: Whether the current user has bookmarked this model. Default: False
+        author: Information about the voice model's creator
     """
 
     id: str = Field(alias="_id")
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 00ec2d8..2d43b32 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -1,5 +1,7 @@
 """Fixtures for integration tests."""
 
+from __future__ import annotations
+
 import os
 from pathlib import Path
 
@@ -21,9 +23,9 @@
 @pytest.fixture
 def api_key():
     """Get API key from environment."""
-    key = os.getenv("FISH_AUDIO_API_KEY")
+    key = os.getenv("FISH_API_KEY")
     if not key:
-        pytest.skip("No API key available (set FISH_AUDIO_API_KEY)")
+        pytest.skip("No API key available (set FISH_API_KEY)")
     return key
 
 
@@ -55,17 +57,20 @@ def save_audio():
         A callable that takes audio chunks and filename and saves to output/
     """
 
-    def _save(audio_chunks: list[bytes], filename: str) -> Path:
-        """Save audio chunks to output directory.
+    def _save(audio: bytes | list[bytes], filename: str) -> Path:
+        """Save audio to output directory.
 
         Args:
-            audio_chunks: List of audio byte chunks
+            audio: Audio bytes or list of audio byte chunks
             filename: Name of the output file (including extension)
 
         Returns:
             Path to the saved file
         """
-        complete_audio = b"".join(audio_chunks)
+        if isinstance(audio, bytes):
+            complete_audio = audio
+        else:
+            complete_audio = b"".join(audio)
         output_file = OUTPUT_DIR / filename
         output_file.write_bytes(complete_audio)
         return output_file
diff --git a/tests/integration/test_asr_integration.py b/tests/integration/test_asr_integration.py
index 953f7c8..7e2602e 100644
--- a/tests/integration/test_asr_integration.py
+++ b/tests/integration/test_asr_integration.py
@@ -13,10 +13,7 @@ def sample_audio(self, client):
         """Generate sample audio for ASR testing."""
         # Generate audio from known text
         config = TTSConfig(format="wav")
-        audio_chunks = list(
-            client.tts.convert(text="Hello world, this is a test.", config=config)
-        )
-        return b"".join(audio_chunks)
+        return client.tts.convert(text="Hello world, this is a test.", config=config)
 
     def test_basic_asr(self, client, sample_audio):
         """Test basic speech-to-text transcription."""
@@ -54,13 +51,8 @@ class TestAsyncASRIntegration:
     @pytest.fixture
     async def async_sample_audio(self, async_client):
         """Generate sample audio for async ASR testing."""
-        audio_chunks = []
         config = TTSConfig(format="wav")
-        async for chunk in async_client.tts.convert(
-            text="Async test audio", config=config
-        ):
-            audio_chunks.append(chunk)
-        return b"".join(audio_chunks)
+        return await async_client.tts.convert(text="Async test audio", config=config)
 
     @pytest.mark.asyncio
     async def test_async_basic_asr(self, async_client, async_sample_audio):
diff --git a/tests/integration/test_tts_integration.py b/tests/integration/test_tts_integration.py
index 8d00d77..f6b4fc2 100644
--- a/tests/integration/test_tts_integration.py
+++ b/tests/integration/test_tts_integration.py
@@ -13,15 +13,13 @@ class TestTTSIntegration:
 
     def test_basic_tts(self, client, save_audio):
         """Test basic text-to-speech generation."""
-        audio_chunks = list(client.tts.convert(text="Hello, this is a test."))
+        audio = client.tts.convert(text="Hello, this is a test.")
 
-        assert len(audio_chunks) > 0
-        # Verify we got audio data (check for common audio headers)
-        complete_audio = b"".join(audio_chunks)
-        assert len(complete_audio) > 1000  # Should have substantial audio data
+        assert len(audio) > 1000  # Should have substantial audio data
+        assert isinstance(audio, bytes)
 
         # Write to output directory
-        save_audio(audio_chunks, "test_basic_tts.mp3")
+        save_audio(audio, "test_basic_tts.mp3")
 
     def test_tts_with_different_formats(self, client, save_audio):
         """Test TTS with different audio formats."""
@@ -29,27 +27,23 @@ def test_tts_with_different_formats(self, client, save_audio):
 
         for fmt in formats:
             config = TTSConfig(format=fmt, chunk_length=100)
-            audio_chunks = list(
-                client.tts.convert(text=f"Testing format {fmt}", config=config)
-            )
-            assert len(audio_chunks) > 0, f"Failed for format: {fmt}"
+            audio = client.tts.convert(text=f"Testing format {fmt}", config=config)
+            assert len(audio) > 0, f"Failed for format: {fmt}"
 
             # Write to output directory
-            save_audio(audio_chunks, f"test_format_{fmt}.{fmt}")
+            save_audio(audio, f"test_format_{fmt}.{fmt}")
 
     def test_tts_with_prosody(self, client, save_audio):
         """Test TTS with prosody settings."""
         prosody = Prosody(speed=1.2, volume=0.5)
         config = TTSConfig(prosody=prosody)
 
-        audio_chunks = list(
-            client.tts.convert(text="Testing prosody settings", config=config)
-        )
+        audio = client.tts.convert(text="Testing prosody settings", config=config)
 
-        assert len(audio_chunks) > 0
+        assert len(audio) > 0
 
         # Write to output directory
-        save_audio(audio_chunks, "test_prosody.mp3")
+        save_audio(audio, "test_prosody.mp3")
 
     def test_tts_with_different_models(self, client, save_audio):
         """Test TTS with different models."""
@@ -57,13 +51,11 @@ def test_tts_with_different_models(self, client, save_audio):
 
         for model in models:
             try:
-                audio_chunks = list(
-                    client.tts.convert(text=f"Testing model {model}", model=model)
-                )
-                assert len(audio_chunks) > 0, f"Failed for model: {model}"
+                audio = client.tts.convert(text=f"Testing model {model}", model=model)
+                assert len(audio) > 0, f"Failed for model: {model}"
 
                 # Write to output directory
-                save_audio(audio_chunks, f"test_model_{model}.mp3")
+                save_audio(audio, f"test_model_{model}.mp3")
             except Exception as e:
                 # Some models might not be available
                 pytest.skip(f"Model {model} not available: {e}")
@@ -73,23 +65,21 @@ def test_tts_longer_text(self, client, save_audio):
         long_text = "This is a longer piece of text for testing. " * 10
         config = TTSConfig(chunk_length=200)
 
-        audio_chunks = list(client.tts.convert(text=long_text, config=config))
+        audio = client.tts.convert(text=long_text, config=config)
 
-        assert len(audio_chunks) > 0
-        complete_audio = b"".join(audio_chunks)
         # Longer text should produce more audio
-        assert len(complete_audio) > 5000
+        assert len(audio) > 5000
 
         # Write to output directory
-        save_audio(audio_chunks, "test_longer_text.mp3")
+        save_audio(audio, "test_longer_text.mp3")
 
     def test_tts_empty_text_should_fail(self, client):
         """Test that empty text is handled."""
         # This might succeed with silence or fail - test behavior
         try:
-            audio_chunks = list(client.tts.convert(text=""))
+            audio = client.tts.convert(text="")
             # If it succeeds, verify we get something
-            assert len(audio_chunks) >= 0
+            assert isinstance(audio, bytes)
         except Exception:
             # If it fails, that's also acceptable
             pass
@@ -101,16 +91,13 @@ class TestAsyncTTSIntegration:
     @pytest.mark.asyncio
     async def test_basic_async_tts(self, async_client, save_audio):
         """Test basic async text-to-speech generation."""
-        audio_chunks = []
-        async for chunk in async_client.tts.convert(text="Hello from async"):
-            audio_chunks.append(chunk)
+        audio = await async_client.tts.convert(text="Hello from async")
 
-        assert len(audio_chunks) > 0
-        complete_audio = b"".join(audio_chunks)
-        assert len(complete_audio) > 1000
+        assert len(audio) > 1000
+        assert isinstance(audio, bytes)
 
         # Write to output directory
-        save_audio(audio_chunks, "test_async_basic.mp3")
+        save_audio(audio, "test_async_basic.mp3")
 
     @pytest.mark.asyncio
     async def test_async_tts_with_prosody(self, async_client, save_audio):
@@ -118,13 +105,9 @@ async def test_async_tts_with_prosody(self, async_client, save_audio):
         prosody = Prosody(speed=0.8, volume=-0.2)
         config = TTSConfig(prosody=prosody)
 
-        audio_chunks = []
-        async for chunk in async_client.tts.convert(
-            text="Async prosody test", config=config
-        ):
-            audio_chunks.append(chunk)
+        audio = await async_client.tts.convert(text="Async prosody test", config=config)
 
-        assert len(audio_chunks) > 0
+        assert len(audio) > 0
 
         # Write to output directory
-        save_audio(audio_chunks, "test_async_prosody.mp3")
+        save_audio(audio, "test_async_prosody.mp3")
diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py
index f1aa2c0..d288491 100644
--- a/tests/unit/test_client.py
+++ b/tests/unit/test_client.py
@@ -20,7 +20,7 @@ def test_init_with_api_key(self, mock_api_key):
         assert client._client_wrapper.api_key == mock_api_key
 
     def test_init_with_env_var(self, mock_api_key):
-        with patch.dict("os.environ", {"FISH_AUDIO_API_KEY": mock_api_key}):
+        with patch.dict("os.environ", {"FISH_API_KEY": mock_api_key}):
             client = FishAudio()
             assert client._client_wrapper.api_key == mock_api_key
 
diff --git a/tests/unit/test_core.py b/tests/unit/test_core.py
index 76a3611..f77dc04 100644
--- a/tests/unit/test_core.py
+++ b/tests/unit/test_core.py
@@ -67,7 +67,7 @@ def test_init_without_api_key_raises(self):
                 ClientWrapper()
 
     def test_init_with_env_var(self, mock_api_key):
-        with patch.dict("os.environ", {"FISH_AUDIO_API_KEY": mock_api_key}):
+        with patch.dict("os.environ", {"FISH_API_KEY": mock_api_key}):
             wrapper = ClientWrapper()
             assert wrapper.api_key == mock_api_key
 
diff --git a/tests/unit/test_tts.py b/tests/unit/test_tts.py
index 6ddff60..47bfb06 100644
--- a/tests/unit/test_tts.py
+++ b/tests/unit/test_tts.py
@@ -40,15 +40,15 @@ def async_tts_client(async_mock_client_wrapper):
 class TestTTSClient:
     """Test synchronous TTSClient."""
 
-    def test_convert_basic(self, tts_client, mock_client_wrapper):
-        """Test basic TTS conversion."""
+    def test_stream_basic(self, tts_client, mock_client_wrapper):
+        """Test basic TTS streaming."""
         # Setup mock response with audio chunks
         mock_response = Mock()
         mock_response.iter_bytes.return_value = iter([b"chunk1", b"chunk2", b"chunk3"])
         mock_client_wrapper.request.return_value = mock_response
 
-        # Call convert
-        audio_chunks = list(tts_client.convert(text="Hello world"))
+        # Call stream
+        audio_chunks = list(tts_client.stream(text="Hello world"))
 
         # Verify we got chunks back
         assert audio_chunks == [b"chunk1", b"chunk2", b"chunk3"]
@@ -67,6 +67,23 @@ def test_convert_basic(self, tts_client, mock_client_wrapper):
         # Check payload was msgpack encoded
         assert "content" in call_args[1]
 
+    def test_convert_basic(self, tts_client, mock_client_wrapper):
+        """Test basic TTS conversion returns bytes."""
+        # Setup mock response with audio chunks
+        mock_response = Mock()
+        mock_response.iter_bytes.return_value = iter([b"chunk1", b"chunk2", b"chunk3"])
+        mock_client_wrapper.request.return_value = mock_response
+
+        # Call convert
+        audio = tts_client.convert(text="Hello world")
+
+        # Verify we got complete audio as bytes
+        assert audio == b"chunk1chunk2chunk3"
+        assert isinstance(audio, bytes)
+
+        # Verify request was made correctly
+        mock_client_wrapper.request.assert_called_once()
+
     def test_convert_with_reference_id(self, tts_client, mock_client_wrapper):
         """Test TTS with reference voice ID."""
         mock_response = Mock()
@@ -74,7 +91,7 @@ def test_convert_with_reference_id(self, tts_client, mock_client_wrapper):
         mock_client_wrapper.request.return_value = mock_response
 
         config = TTSConfig(reference_id="voice_123")
-        list(tts_client.convert(text="Hello", config=config))
+        tts_client.convert(text="Hello", config=config)
 
         # Verify reference_id in payload
         call_args = mock_client_wrapper.request.call_args
@@ -87,7 +104,7 @@ def test_convert_with_reference_id_parameter(self, tts_client, mock_client_wrapp
         mock_response.iter_bytes.return_value = iter([b"audio"])
         mock_client_wrapper.request.return_value = mock_response
 
-        list(tts_client.convert(text="Hello", reference_id="voice_456"))
+        tts_client.convert(text="Hello", reference_id="voice_456")
 
         # Verify reference_id in payload
         call_args = mock_client_wrapper.request.call_args
@@ -103,11 +120,7 @@ def test_convert_parameter_reference_id_overrides_config(
         mock_client_wrapper.request.return_value = mock_response
 
         config = TTSConfig(reference_id="voice_from_config")
-        list(
-            tts_client.convert(
-                text="Hello", reference_id="voice_from_param", config=config
-            )
-        )
+        tts_client.convert(text="Hello", reference_id="voice_from_param", config=config)
 
         # Verify parameter reference_id takes precedence
         call_args = mock_client_wrapper.request.call_args
@@ -126,7 +139,7 @@ def test_convert_with_references(self, tts_client, mock_client_wrapper):
         ]
 
         config = TTSConfig(references=references)
-        list(tts_client.convert(text="Hello", config=config))
+        tts_client.convert(text="Hello", config=config)
 
         # Verify references in payload
         call_args = mock_client_wrapper.request.call_args
@@ -146,7 +159,7 @@ def test_convert_with_references_parameter(self, tts_client, mock_client_wrapper
             ReferenceAudio(audio=b"ref_audio_2", text="Sample 2"),
         ]
 
-        list(tts_client.convert(text="Hello", references=references))
+        tts_client.convert(text="Hello", references=references)
 
         # Verify references in payload
         call_args = mock_client_wrapper.request.call_args
@@ -167,7 +180,7 @@ def test_convert_parameter_references_overrides_config(
         param_refs = [ReferenceAudio(audio=b"param_audio", text="Param")]
 
         config = TTSConfig(references=config_refs)
-        list(tts_client.convert(text="Hello", references=param_refs, config=config))
+        tts_client.convert(text="Hello", references=param_refs, config=config)
 
         # Verify parameter references take precedence
         call_args = mock_client_wrapper.request.call_args
@@ -181,7 +194,7 @@ def test_convert_with_different_backend(self, tts_client, mock_client_wrapper):
         mock_response.iter_bytes.return_value = iter([b"audio"])
         mock_client_wrapper.request.return_value = mock_response
 
-        list(tts_client.convert(text="Hello", model="s1"))
+        tts_client.convert(text="Hello", model="s1")
 
         # Verify model in headers
         call_args = mock_client_wrapper.request.call_args
@@ -196,7 +209,7 @@ def test_convert_with_prosody(self, tts_client, mock_client_wrapper):
         prosody = Prosody(speed=1.5, volume=0.5)
         config = TTSConfig(prosody=prosody)
 
-        list(tts_client.convert(text="Hello", config=config))
+        tts_client.convert(text="Hello", config=config)
 
         # Verify prosody in payload
         call_args = mock_client_wrapper.request.call_args
@@ -221,7 +234,7 @@ def test_convert_with_custom_parameters(self, tts_client, mock_client_wrapper):
             temperature=0.8,
         )
 
-        list(tts_client.convert(text="Hello", config=config))
+        tts_client.convert(text="Hello", config=config)
 
         # Verify parameters in payload
         call_args = mock_client_wrapper.request.call_args
@@ -242,7 +255,7 @@ def test_convert_omit_parameters_not_sent(self, tts_client, mock_client_wrapper)
         mock_client_wrapper.request.return_value = mock_response
 
         # Call with defaults (None values should be excluded)
-        list(tts_client.convert(text="Hello"))
+        tts_client.convert(text="Hello")
 
         # Verify None params not in payload
         call_args = mock_client_wrapper.request.call_args
@@ -266,14 +279,14 @@ def test_convert_with_request_options(self, tts_client, mock_client_wrapper):
             timeout=120.0, additional_headers={"X-Custom": "value"}
         )
 
-        list(tts_client.convert(text="Hello", request_options=request_options))
+        tts_client.convert(text="Hello", request_options=request_options)
 
         # Verify request_options passed through
         call_args = mock_client_wrapper.request.call_args
         assert call_args[1]["request_options"] == request_options
 
-    def test_convert_streaming_behavior(self, tts_client, mock_client_wrapper):
-        """Test that convert returns an iterator that can be consumed."""
+    def test_stream_behavior(self, tts_client, mock_client_wrapper):
+        """Test that stream returns an iterator that can be consumed."""
         # Setup mock with multiple chunks
         mock_response = Mock()
         chunks = [b"chunk1", b"chunk2", b"chunk3", b""]  # Empty chunk should be skipped
@@ -281,11 +294,11 @@ def test_convert_streaming_behavior(self, tts_client, mock_client_wrapper):
         mock_client_wrapper.request.return_value = mock_response
 
         # Get iterator
-        audio_iterator = tts_client.convert(text="Hello")
+        audio_stream = tts_client.stream(text="Hello")
 
         # Consume one chunk at a time
         result = []
-        for chunk in audio_iterator:
+        for chunk in audio_stream:
             result.append(chunk)
 
         # Empty chunk should be filtered out
@@ -297,9 +310,9 @@ def test_convert_empty_response(self, tts_client, mock_client_wrapper):
         mock_response.iter_bytes.return_value = iter([])
         mock_client_wrapper.request.return_value = mock_response
 
-        audio_chunks = list(tts_client.convert(text="Hello"))
+        audio = tts_client.convert(text="Hello")
 
-        assert audio_chunks == []
+        assert audio == b""
 
     def test_convert_with_format_parameter(self, tts_client, mock_client_wrapper):
         """Test TTS with format as direct parameter."""
@@ -307,7 +320,7 @@ def test_convert_with_format_parameter(self, tts_client, mock_client_wrapper):
         mock_response.iter_bytes.return_value = iter([b"audio"])
         mock_client_wrapper.request.return_value = mock_response
 
-        list(tts_client.convert(text="Hello", format="wav"))
+        tts_client.convert(text="Hello", format="wav")
 
         # Verify format in payload
         call_args = mock_client_wrapper.request.call_args
@@ -320,7 +333,7 @@ def test_convert_with_opus_format(self, tts_client, mock_client_wrapper):
         mock_response.iter_bytes.return_value = iter([b"audio"])
         mock_client_wrapper.request.return_value = mock_response
 
-        list(tts_client.convert(text="Hello", format="opus"))
+        tts_client.convert(text="Hello", format="opus")
 
         # Verify opus format in payload
         call_args = mock_client_wrapper.request.call_args
@@ -333,7 +346,7 @@ def test_convert_with_latency_parameter(self, tts_client, mock_client_wrapper):
         mock_response.iter_bytes.return_value = iter([b"audio"])
         mock_client_wrapper.request.return_value = mock_response
 
-        list(tts_client.convert(text="Hello", latency="normal"))
+        tts_client.convert(text="Hello", latency="normal")
 
         # Verify latency in payload
         call_args = mock_client_wrapper.request.call_args
@@ -346,7 +359,7 @@ def test_convert_with_speed_parameter(self, tts_client, mock_client_wrapper):
         mock_response.iter_bytes.return_value = iter([b"audio"])
         mock_client_wrapper.request.return_value = mock_response
 
-        list(tts_client.convert(text="Hello", speed=1.5))
+        tts_client.convert(text="Hello", speed=1.5)
 
         # Verify speed creates prosody in payload
         call_args = mock_client_wrapper.request.call_args
@@ -362,7 +375,7 @@ def test_convert_parameter_format_overrides_config(
         mock_client_wrapper.request.return_value = mock_response
 
         config = TTSConfig(format="wav")
-        list(tts_client.convert(text="Hello", format="pcm", config=config))
+        tts_client.convert(text="Hello", format="pcm", config=config)
 
         # Verify parameter format takes precedence
         call_args = mock_client_wrapper.request.call_args
@@ -378,7 +391,7 @@ def test_convert_parameter_speed_overrides_config_prosody(
         mock_client_wrapper.request.return_value = mock_response
 
         config = TTSConfig(prosody=Prosody(speed=2.0, volume=0.5))
-        list(tts_client.convert(text="Hello", speed=1.5, config=config))
+        tts_client.convert(text="Hello", speed=1.5, config=config)
 
         # Verify parameter speed takes precedence but volume is preserved
         call_args = mock_client_wrapper.request.call_args
@@ -410,8 +423,8 @@ class TestAsyncTTSClient:
     """Test asynchronous AsyncTTSClient."""
 
     @pytest.mark.asyncio
-    async def test_convert_basic(self, async_tts_client, async_mock_client_wrapper):
-        """Test basic async TTS conversion."""
+    async def test_stream_basic(self, async_tts_client, async_mock_client_wrapper):
+        """Test basic async TTS streaming."""
         # Setup mock response
         mock_response = Mock()
 
@@ -422,9 +435,10 @@ async def async_iter_bytes():
         mock_response.aiter_bytes = async_iter_bytes
         async_mock_client_wrapper.request = AsyncMock(return_value=mock_response)
 
-        # Call convert and collect chunks
+        # Call stream and collect chunks
         audio_chunks = []
-        async for chunk in async_tts_client.convert(text="Hello world"):
+        stream = await async_tts_client.stream(text="Hello world")
+        async for chunk in stream:
             audio_chunks.append(chunk)
 
         assert audio_chunks == [b"chunk1", b"chunk2", b"chunk3"]
@@ -436,6 +450,29 @@ async def async_iter_bytes():
         assert call_args[0][0] == "POST"
         assert call_args[0][1] == "/v1/tts"
 
+    @pytest.mark.asyncio
+    async def test_convert_basic(self, async_tts_client, async_mock_client_wrapper):
+        """Test basic async TTS conversion returns bytes."""
+        # Setup mock response
+        mock_response = Mock()
+
+        async def async_iter_bytes():
+            for chunk in [b"chunk1", b"chunk2", b"chunk3"]:
+                yield chunk
+
+        mock_response.aiter_bytes = async_iter_bytes
+        async_mock_client_wrapper.request = AsyncMock(return_value=mock_response)
+
+        # Call convert
+        audio = await async_tts_client.convert(text="Hello world")
+
+        # Verify we got complete audio as bytes
+        assert audio == b"chunk1chunk2chunk3"
+        assert isinstance(audio, bytes)
+
+        # Verify request was made
+        async_mock_client_wrapper.request.assert_called_once()
+
     @pytest.mark.asyncio
     async def test_convert_with_reference_id(
         self, async_tts_client, async_mock_client_wrapper
@@ -450,9 +487,7 @@ async def async_iter_bytes():
         async_mock_client_wrapper.request = AsyncMock(return_value=mock_response)
 
         config = TTSConfig(reference_id="voice_123")
-        audio_chunks = []
-        async for chunk in async_tts_client.convert(text="Hello", config=config):
-            audio_chunks.append(chunk)
+        await async_tts_client.convert(text="Hello", config=config)
 
         # Verify reference_id in payload
         call_args = async_mock_client_wrapper.request.call_args
@@ -472,11 +507,7 @@ async def async_iter_bytes():
         mock_response.aiter_bytes = async_iter_bytes
         async_mock_client_wrapper.request = AsyncMock(return_value=mock_response)
 
-        audio_chunks = []
-        async for chunk in async_tts_client.convert(
-            text="Hello", reference_id="voice_456"
-        ):
-            audio_chunks.append(chunk)
+        await async_tts_client.convert(text="Hello", reference_id="voice_456")
 
         # Verify reference_id in payload
         call_args = async_mock_client_wrapper.request.call_args
@@ -497,11 +528,9 @@ async def async_iter_bytes():
         async_mock_client_wrapper.request = AsyncMock(return_value=mock_response)
 
         config = TTSConfig(reference_id="voice_from_config")
-        audio_chunks = []
-        async for chunk in async_tts_client.convert(
+        await async_tts_client.convert(
             text="Hello", reference_id="voice_from_param", config=config
-        ):
-            audio_chunks.append(chunk)
+        )
 
         # Verify parameter reference_id takes precedence
         call_args = async_mock_client_wrapper.request.call_args
@@ -526,11 +555,7 @@ async def async_iter_bytes():
             ReferenceAudio(audio=b"ref_audio_2", text="Sample 2"),
         ]
 
-        audio_chunks = []
-        async for chunk in async_tts_client.convert(
-            text="Hello", references=references
-        ):
-            audio_chunks.append(chunk)
+        await async_tts_client.convert(text="Hello", references=references)
 
         # Verify references in payload
         call_args = async_mock_client_wrapper.request.call_args
@@ -556,11 +581,9 @@ async def async_iter_bytes():
         param_refs = [ReferenceAudio(audio=b"param_audio", text="Param")]
 
         config = TTSConfig(references=config_refs)
-        audio_chunks = []
-        async for chunk in async_tts_client.convert(
+        await async_tts_client.convert(
             text="Hello", references=param_refs, config=config
-        ):
-            audio_chunks.append(chunk)
+        )
 
         # Verify parameter references take precedence
         call_args = async_mock_client_wrapper.request.call_args
@@ -584,9 +607,7 @@ async def async_iter_bytes():
         prosody = Prosody(speed=2.0, volume=1.0)
         config = TTSConfig(prosody=prosody)
 
-        audio_chunks = []
-        async for chunk in async_tts_client.convert(text="Hello", config=config):
-            audio_chunks.append(chunk)
+        await async_tts_client.convert(text="Hello", config=config)
 
         # Verify prosody in payload
         call_args = async_mock_client_wrapper.request.call_args
@@ -607,9 +628,7 @@ async def async_iter_bytes():
         mock_response.aiter_bytes = async_iter_bytes
         async_mock_client_wrapper.request = AsyncMock(return_value=mock_response)
 
-        audio_chunks = []
-        async for chunk in async_tts_client.convert(text="Hello"):
-            audio_chunks.append(chunk)
+        await async_tts_client.convert(text="Hello")
 
         # Verify OMIT params not in payload
         call_args = async_mock_client_wrapper.request.call_args
@@ -633,11 +652,9 @@ async def async_iter_bytes():
         mock_response.aiter_bytes = async_iter_bytes
         async_mock_client_wrapper.request = AsyncMock(return_value=mock_response)
 
-        audio_chunks = []
-        async for chunk in async_tts_client.convert(text="Hello"):
-            audio_chunks.append(chunk)
+        audio = await async_tts_client.convert(text="Hello")
 
-        assert audio_chunks == []
+        assert audio == b""
 
     @pytest.mark.asyncio
     async def test_convert_with_format_parameter(
@@ -652,9 +669,7 @@ async def async_iter_bytes():
         mock_response.aiter_bytes = async_iter_bytes
         async_mock_client_wrapper.request = AsyncMock(return_value=mock_response)
 
-        audio_chunks = []
-        async for chunk in async_tts_client.convert(text="Hello", format="wav"):
-            audio_chunks.append(chunk)
+        await async_tts_client.convert(text="Hello", format="wav")
 
         # Verify format in payload
         call_args = async_mock_client_wrapper.request.call_args
@@ -674,9 +689,7 @@ async def async_iter_bytes():
         mock_response.aiter_bytes = async_iter_bytes
         async_mock_client_wrapper.request = AsyncMock(return_value=mock_response)
 
-        audio_chunks = []
-        async for chunk in async_tts_client.convert(text="Hello", latency="normal"):
-            audio_chunks.append(chunk)
+        await async_tts_client.convert(text="Hello", latency="normal")
 
         # Verify latency in payload
         call_args = async_mock_client_wrapper.request.call_args
@@ -696,9 +709,7 @@ async def async_iter_bytes():
         mock_response.aiter_bytes = async_iter_bytes
         async_mock_client_wrapper.request = AsyncMock(return_value=mock_response)
 
-        audio_chunks = []
-        async for chunk in async_tts_client.convert(text="Hello", speed=1.5):
-            audio_chunks.append(chunk)
+        await async_tts_client.convert(text="Hello", speed=1.5)
 
         # Verify speed creates prosody in payload
         call_args = async_mock_client_wrapper.request.call_args
@@ -719,11 +730,7 @@ async def async_iter_bytes():
         async_mock_client_wrapper.request = AsyncMock(return_value=mock_response)
 
         config = TTSConfig(format="wav")
-        audio_chunks = []
-        async for chunk in async_tts_client.convert(
-            text="Hello", format="pcm", config=config
-        ):
-            audio_chunks.append(chunk)
+        await async_tts_client.convert(text="Hello", format="pcm", config=config)
 
         # Verify parameter format takes precedence
         call_args = async_mock_client_wrapper.request.call_args
@@ -744,11 +751,7 @@ async def async_iter_bytes():
         async_mock_client_wrapper.request = AsyncMock(return_value=mock_response)
 
         config = TTSConfig(prosody=Prosody(speed=2.0, volume=0.5))
-        audio_chunks = []
-        async for chunk in async_tts_client.convert(
-            text="Hello", speed=1.5, config=config
-        ):
-            audio_chunks.append(chunk)
+        await async_tts_client.convert(text="Hello", speed=1.5, config=config)
 
         # Verify parameter speed takes precedence but volume is preserved
         call_args = async_mock_client_wrapper.request.call_args
@@ -769,11 +772,9 @@ async def async_iter_bytes():
         mock_response.aiter_bytes = async_iter_bytes
         async_mock_client_wrapper.request = AsyncMock(return_value=mock_response)
 
-        audio_chunks = []
-        async for chunk in async_tts_client.convert(
+        await async_tts_client.convert(
             text="Hello", format="wav", speed=1.3, latency="normal"
-        ):
-            audio_chunks.append(chunk)
+        )
 
         # Verify all parameters in payload
         call_args = async_mock_client_wrapper.request.call_args