> ## Documentation Index
> Fetch the complete documentation index at: https://hanabiaiinc-auto-go-api-docs.mintlify.site/llms.txt
> Use this file to discover all available pages before exploring further.

# Text-to-Speech

> Generate natural-sounding speech with the Fish Audio Python SDK

export const AudioTranscript = ({voices = []}) => {
  const [selectedVoice, setSelectedVoice] = useState(0);
  const [isPlaying, setIsPlaying] = useState(false);
  const [currentTime, setCurrentTime] = useState(0);
  const [duration, setDuration] = useState(0);
  const [isDropdownOpen, setIsDropdownOpen] = useState(false);
  const audioRef = useRef(null);
  const dropdownRef = useRef(null);
  useEffect(() => {
    const audio = audioRef.current;
    if (!audio) return;
    const updateTime = () => setCurrentTime(audio.currentTime);
    const updateDuration = () => setDuration(audio.duration);
    const handleEnded = () => setIsPlaying(false);
    audio.addEventListener('timeupdate', updateTime);
    audio.addEventListener('loadedmetadata', updateDuration);
    audio.addEventListener('ended', handleEnded);
    return () => {
      audio.removeEventListener('timeupdate', updateTime);
      audio.removeEventListener('loadedmetadata', updateDuration);
      audio.removeEventListener('ended', handleEnded);
    };
  }, []);
  useEffect(() => {
    const handleClickOutside = event => {
      if (dropdownRef.current && !dropdownRef.current.contains(event.target)) {
        setIsDropdownOpen(false);
      }
    };
    if (isDropdownOpen) {
      document.addEventListener('mousedown', handleClickOutside);
    }
    return () => {
      document.removeEventListener('mousedown', handleClickOutside);
    };
  }, [isDropdownOpen]);
  useEffect(() => {
    if (audioRef.current) {
      audioRef.current.pause();
      audioRef.current.load();
      setIsPlaying(false);
      setCurrentTime(0);
    }
  }, [selectedVoice]);
  const togglePlay = () => {
    if (isPlaying) {
      audioRef.current.pause();
    } else {
      audioRef.current.play();
    }
    setIsPlaying(!isPlaying);
  };
  const handleProgressChange = e => {
    const newTime = parseFloat(e.target.value);
    audioRef.current.currentTime = newTime;
    setCurrentTime(newTime);
  };
  const formatTime = time => {
    if (isNaN(time)) return '0:00';
    const minutes = Math.floor(time / 60);
    const seconds = Math.floor(time % 60);
    return `${minutes}:${seconds.toString().padStart(2, '0')}`;
  };
  const currentVoice = voices[selectedVoice];
  return <div className="border rounded-lg bg-card border-gray-200 dark:border-gray-800">
      {}
      <div className="grid grid-cols-3 items-center px-3 py-1.5 bg-muted border-b border-gray-200 dark:border-gray-800">
        <span className="text-xs font-medium">Listen to Page</span>

        <span className="text-xs font-semibold text-muted-foreground text-center">Powered by Fish Audio S1</span>

        {voices.length > 1 ? <div className="relative justify-self-end" ref={dropdownRef}>
            <button onClick={() => setIsDropdownOpen(!isDropdownOpen)} className="flex items-center gap-1.5 px-3 py-1 rounded-full bg-muted hover:bg-gray-200 dark:hover:bg-gray-700 transition-all duration-200 cursor-pointer text-xs">
              <span className="text-muted-foreground">Voice:</span>
              <span className="font-medium">{voices[selectedVoice]?.name}</span>
              <svg className={`w-3 h-3 transition-transform duration-200 ${isDropdownOpen ? 'rotate-180' : ''}`} fill="none" stroke="currentColor" viewBox="0 0 24 24">
                <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 9l-7 7-7-7" />
              </svg>
            </button>

            {isDropdownOpen && <div className="absolute right-0 mt-1 w-auto bg-white dark:bg-black border border-gray-200 dark:border-gray-700 rounded-lg overflow-hidden z-50">
                {voices.map((voice, index) => <button key={index} onClick={() => {
    setSelectedVoice(index);
    setIsDropdownOpen(false);
  }} className={`w-full px-3 py-1.5 text-left text-xs hover:bg-gray-100 dark:hover:bg-gray-800 transition-colors flex items-center gap-2 ${index === selectedVoice ? 'bg-gray-100 dark:bg-gray-800 font-medium' : ''}`}>
                    {voice.id && <img src={`https://public-platform.r2.fish.audio/coverimage/${voice.id}`} alt={voice.name} className="w-5 h-5 rounded-full m-0 flex-shrink-0 object-cover" />}
                    <span className="flex-1 whitespace-nowrap">{voice.name}</span>
                  </button>)}
              </div>}
          </div> : <div className="justify-self-end" />}
      </div>

      {}
      <div className="px-3 py-1.5 bg-card">
        <audio ref={audioRef} src={currentVoice?.url} preload="metadata" />

        <div className="flex items-center gap-2">
          {}
          <button onClick={togglePlay} className="flex-shrink-0 w-6 h-6 flex items-center justify-center bg-gray-300 dark:bg-gray-600 text-gray-800 dark:text-gray-200 rounded-full hover:opacity-80 transition-opacity relative overflow-hidden" aria-label={isPlaying ? 'Pause' : 'Play'}>
            <div className="transition-transform duration-300 ease-in-out" style={{
    transform: isPlaying ? 'rotate(180deg)' : 'rotate(0deg)'
  }}>
              {isPlaying ? <svg className="w-3 h-3" fill="currentColor" viewBox="0 0 24 24">
                  <path d="M6 4h4v16H6V4zm8 0h4v16h-4V4z" />
                </svg> : <svg className="w-3 h-3 ml-0.5" fill="currentColor" viewBox="0 0 24 24">
                  <path d="M8 5v14l11-7z" />
                </svg>}
            </div>
          </button>

          {}
          <div className="flex-1 flex items-center gap-2">
            <span className="text-xs font-mono text-gray-500 dark:text-gray-400 min-w-[35px]">
              {formatTime(currentTime)}
            </span>

            <div className="flex-1 relative h-1 bg-gray-200 dark:bg-gray-700 rounded-full overflow-hidden">
              <div className="absolute top-0 left-0 h-full bg-gray-400 dark:bg-gray-500 transition-all duration-100" style={{
    width: `${duration ? currentTime / duration * 100 : 0}%`
  }} />
              <input type="range" min="0" max={duration || 0} value={currentTime} onChange={handleProgressChange} className="absolute top-0 left-0 w-full h-full opacity-0 cursor-pointer" />
            </div>
            <span className="text-xs font-mono text-gray-500 dark:text-gray-400 min-w-[35px]">
              {formatTime(duration)}
            </span>
          </div>
        </div>
      </div>
    </div>;
};

<AudioTranscript
  voices={[
{
  "id": "8ef4a238714b45718ce04243307c57a7",
  "name": "E-girl",
  "url": "https://pub-b995142090474379a930b856ab79b4d4.r2.dev/audio/python-text-to-speech/8ef4a238714b45718ce04243307c57a7.mp3"
},
{
  "id": "802e3bc2b27e49c2995d23ef70e6ac89",
  "name": "Energetic Male",
  "url": "https://pub-b995142090474379a930b856ab79b4d4.r2.dev/audio/python-text-to-speech/802e3bc2b27e49c2995d23ef70e6ac89.mp3"
},
{
  "id": "933563129e564b19a115bedd57b7406a",
  "name": "Sarah",
  "url": "https://pub-b995142090474379a930b856ab79b4d4.r2.dev/audio/python-text-to-speech/933563129e564b19a115bedd57b7406a.mp3"
},
{
  "id": "bf322df2096a46f18c579d0baa36f41d",
  "name": "Adrian",
  "url": "https://pub-b995142090474379a930b856ab79b4d4.r2.dev/audio/python-text-to-speech/bf322df2096a46f18c579d0baa36f41d.mp3"
},
{
  "id": "b347db033a6549378b48d00acb0d06cd",
  "name": "Selene",
  "url": "https://pub-b995142090474379a930b856ab79b4d4.r2.dev/audio/python-text-to-speech/b347db033a6549378b48d00acb0d06cd.mp3"
},
{
  "id": "536d3a5e000945adb7038665781a4aca",
  "name": "Ethan",
  "url": "https://pub-b995142090474379a930b856ab79b4d4.r2.dev/audio/python-text-to-speech/536d3a5e000945adb7038665781a4aca.mp3"
}
]}
/>

## Prerequisites

<AccordionGroup>
  <Accordion icon="user-plus" title="Create a Fish Audio account">
    Sign up for a free Fish Audio account to get started with our API.

    1. Go to [fish.audio/auth/signup](https://fish.audio/auth/signup)
    2. Fill in your details to create an account, complete steps to verify your account.
    3. Log in to your account and navigate to the [API section](https://fish.audio/app/api-keys)
  </Accordion>

  <Accordion icon="key" title="Get your API key">
    Once you have an account, you'll need an API key to authenticate your requests.

    1. Log in to your [Fish Audio Dashboard](https://fish.audio/app/api-keys/)
    2. Navigate to the API Keys section
    3. Click "Create New Key" and give it a descriptive name, set a expiration if desired
    4. Copy your key and store it securely

    <Warning>Keep your API key secret! Never commit it to version control or share it publicly.</Warning>
  </Accordion>
</AccordionGroup>

## Understanding TTS Methods

The SDK provides three methods for text-to-speech generation, each optimized for different use cases:

| Method                                                                       | Returns              | Best For                                                                 |
| ---------------------------------------------------------------------------- | -------------------- | ------------------------------------------------------------------------ |
| [`convert()`](/api-reference/sdk/python/resources#convert)                   | Complete audio bytes | Most use cases - simple, gets full audio at once                         |
| [`stream()`](/api-reference/sdk/python/resources#stream)                     | `AudioStream`        | Chunk-by-chunk processing, memory-efficient transfer                     |
| [`stream_websocket()`](/api-reference/sdk/python/resources#stream_websocket) | Audio bytes iterator | Real-time streaming with dynamic text (LLM responses, conversational AI) |

<Tip>
  Use `convert()` for most use cases. Use `stream()` for memory efficiency when handling large files. Use `stream_websocket()` when text is generated dynamically in real-time.
</Tip>

## Basic Usage

Generate speech from text with a single function call:

<CodeGroup>
  ```python Synchronous focus={6-9} theme={null}
  from fishaudio import FishAudio
  from fishaudio.utils import save, play

  client = FishAudio()

  # Generate speech (returns bytes)
  audio = client.tts.convert(text="Hello, welcome to Fish Audio!")

  # Play or save the audio
  play(audio)
  save(audio, "output.mp3")
  ```

  ```python Asynchronous focus={8-11} theme={null}
  import asyncio
  from fishaudio import AsyncFishAudio
  from fishaudio.utils import save, play

  async def main():
      client = AsyncFishAudio()

      # Generate speech (returns bytes)
      audio = await client.tts.convert(text="Hello, welcome to Fish Audio!")

      # Play or save the audio
      play(audio)
      save(audio, "output.mp3")

  asyncio.run(main())
  ```
</CodeGroup>

## Using Voice Models

Specify a voice model for consistent voice characteristics:

<CodeGroup>
  ```python Synchronous focus={6-10} theme={null}
  from fishaudio import FishAudio
  from fishaudio.utils import play

  client = FishAudio()

  # Use a specific voice
  audio = client.tts.convert(
      text="This uses a specific voice model",
      reference_id="bf322df2096a46f18c579d0baa36f41d"  # Adrian
  )
  play(audio)
  ```

  ```python Asynchronous focus={8-12} theme={null}
  import asyncio
  from fishaudio import AsyncFishAudio
  from fishaudio.utils import play

  async def main():
      client = AsyncFishAudio()

      # Use a specific voice
      audio = await client.tts.convert(
          text="This uses a specific voice model",
          reference_id="bf322df2096a46f18c579d0baa36f41d"  # Adrian
      )
      play(audio)

  asyncio.run(main())
  ```
</CodeGroup>

### Finding Voice Models

Get voice model IDs from the Fish Audio website or programmatically:

<CodeGroup>
  ```python Synchronous focus={5-16} theme={null}
  from fishaudio import FishAudio
  from fishaudio.utils import play

  client = FishAudio()

  # List available voices
  voices = client.voices.list(language="en", tags="male")

  for voice in voices.items:
      print(f"{voice.title}: {voice.id}")

  # Use a voice from the list
  audio = client.tts.convert(
      text="Generated with discovered voice",
      reference_id=voices.items[0].id
  )
  play(audio)
  ```

  ```python Asynchronous focus={7-18} theme={null}
  import asyncio
  from fishaudio import AsyncFishAudio
  from fishaudio.utils import play

  async def main():
      client = AsyncFishAudio()

      # List available voices
      voices = await client.voices.list(language="en", tags="male")

      for voice in voices.items:
          print(f"{voice.title}: {voice.id}")

      # Use a voice from the list
      audio = await client.tts.convert(
          text="Generated with discovered voice",
          reference_id=voices.items[0].id
      )
      play(audio)

  asyncio.run(main())
  ```
</CodeGroup>

Learn more in the [Voice Cloning guide](/developer-guide/sdk-guide/python/voice-cloning).

## Emotions and Expressions

Add emotional expressions to make speech more natural:

<CodeGroup>
  ```python Synchronous focus={5-16} theme={null}
  from fishaudio import FishAudio
  from fishaudio.utils import play

  client = FishAudio()

  text = """
  (happy) I'm excited to announce this!
  (sad) Unfortunately, it didn't work out.
  (angry) This is so frustrating!
  (calm) Let me explain the details.
  """

  audio = client.tts.convert(
      text=text,
      reference_id="933563129e564b19a115bedd57b7406a"  # Sarah
  )
  play(audio)
  ```

  ```python Asynchronous focus={7-18} theme={null}
  import asyncio
  from fishaudio import AsyncFishAudio
  from fishaudio.utils import play

  async def main():
      client = AsyncFishAudio()

      text = """
      (happy) I'm excited to announce this!
      (sad) Unfortunately, it didn't work out.
      (angry) This is so frustrating!
      (calm) Let me explain the details.
      """

      audio = await client.tts.convert(
          text=text,
          reference_id="933563129e564b19a115bedd57b7406a"  # Sarah
      )
      play(audio)

  asyncio.run(main())
  ```
</CodeGroup>

See the [Emotion Reference](/api-reference/emotion-reference) for all available emotions and [Fine-grained Control](/developer-guide/core-features/fine-grained-control) for advanced usage.

## Audio Formats

Choose the output format based on your needs:

<CodeGroup>
  ```python Synchronous focus={5-21} theme={null}
  from fishaudio import FishAudio

  client = FishAudio()

  # MP3 (default) - good balance of quality and size
  audio = client.tts.convert(
      text="MP3 format",
      format="mp3"
  )

  # WAV - uncompressed, highest quality
  audio = client.tts.convert(
      text="WAV format",
      format="wav"
  )

  # PCM - raw audio data for streaming
  audio = client.tts.convert(
      text="PCM format",
      format="pcm"
  )
  ```

  ```python Asynchronous focus={7-23} theme={null}
  import asyncio
  from fishaudio import AsyncFishAudio

  async def main():
      client = AsyncFishAudio()

      # MP3 (default) - good balance of quality and size
      audio = await client.tts.convert(
          text="MP3 format",
          format="mp3"
      )

      # WAV - uncompressed, highest quality
      audio = await client.tts.convert(
          text="WAV format",
          format="wav"
      )

      # PCM - raw audio data for streaming
      audio = await client.tts.convert(
          text="PCM format",
          format="pcm"
      )

  asyncio.run(main())
  ```
</CodeGroup>

## Prosody Control

Adjust speech speed and volume for natural-sounding output:

<CodeGroup>
  ```python Synchronous focus={6-10} theme={null}
  from fishaudio import FishAudio
  from fishaudio.utils import play

  client = FishAudio()

  # Simple speed adjustment
  audio = client.tts.convert(
      text="This will be spoken faster",
      speed=1.5  # 1.5x speed (range: 0.5-2.0)
  )
  play(audio)
  ```

  ```python Asynchronous focus={8-12} theme={null}
  import asyncio
  from fishaudio import AsyncFishAudio
  from fishaudio.utils import play

  async def main():
      client = AsyncFishAudio()

      # Simple speed adjustment
      audio = await client.tts.convert(
          text="This will be spoken faster",
          speed=1.5  # 1.5x speed (range: 0.5-2.0)
      )
      play(audio)

  asyncio.run(main())
  ```
</CodeGroup>

For combined speed and volume control, use [`TTSConfig`](/api-reference/sdk/python/types#ttsconfig-objects) with [`Prosody`](/api-reference/sdk/python/types#prosody-objects):

<CodeGroup>
  ```python Synchronous focus={7-17} theme={null}
  from fishaudio import FishAudio
  from fishaudio.types import TTSConfig, Prosody
  from fishaudio.utils import play

  client = FishAudio()

  # Configure prosody with TTSConfig
  audio = client.tts.convert(
      text="Adjusted speech with custom speed and volume",
      config=TTSConfig(
          prosody=Prosody(
              speed=1.2,   # 20% faster
              volume=5     # Louder (range: -20 to 20)
          )
      )
  )
  play(audio)
  ```

  ```python Asynchronous focus={9-19} theme={null}
  import asyncio
  from fishaudio import AsyncFishAudio
  from fishaudio.types import TTSConfig, Prosody
  from fishaudio.utils import play

  async def main():
      client = AsyncFishAudio()

      # Configure prosody with TTSConfig
      audio = await client.tts.convert(
          text="Adjusted speech with custom speed and volume",
          config=TTSConfig(
              prosody=Prosody(
                  speed=1.2,   # 20% faster
                  volume=5     # Louder (range: -20 to 20)
              )
          )
      )
      play(audio)

  asyncio.run(main())
  ```
</CodeGroup>

## Reusable TTS Configuration

Create a configuration once and reuse it across multiple generations:

<CodeGroup>
  ```python Synchronous focus={5-18} theme={null}
  from fishaudio import FishAudio
  from fishaudio.types import TTSConfig, Prosody

  client = FishAudio()

  # Define config once
  my_config = TTSConfig(
      prosody=Prosody(speed=1.2, volume=-5),
      reference_id="bf322df2096a46f18c579d0baa36f41d",  # Adrian
      format="wav",
      latency="balanced"
  )

  # Reuse across multiple generations
  audio1 = client.tts.convert(text="Welcome to our product demonstration.", config=my_config)
  audio2 = client.tts.convert(text="Let me show you the key features.", config=my_config)
  audio3 = client.tts.convert(text="Thank you for watching this tutorial.", config=my_config)
  ```

  ```python Asynchronous focus={7-20} theme={null}
  import asyncio
  from fishaudio import AsyncFishAudio
  from fishaudio.types import TTSConfig, Prosody

  async def main():
      client = AsyncFishAudio()

      # Define config once
      my_config = TTSConfig(
          prosody=Prosody(speed=1.2, volume=-5),
          reference_id="bf322df2096a46f18c579d0baa36f41d",  # Adrian
          format="wav",
          latency="balanced"
      )

      # Reuse across multiple generations
      audio1 = await client.tts.convert(text="Welcome to our product demonstration.", config=my_config)
      audio2 = await client.tts.convert(text="Let me show you the key features.", config=my_config)
      audio3 = await client.tts.convert(text="Thank you for watching this tutorial.", config=my_config)

  asyncio.run(main())
  ```
</CodeGroup>

## Chunk-by-Chunk Streaming

Use `stream()` for memory-efficient transfer and progressive download. Chunks are network transmission units (not semantic audio segments):

<CodeGroup>
  ```python Synchronous focus={5-8} theme={null}
  from fishaudio import FishAudio

  client = FishAudio()

  # Collect all chunks efficiently
  audio_stream = client.tts.stream(text="Long text here")
  audio = audio_stream.collect()  # Returns complete audio as bytes
  ```

  ```python Asynchronous focus={7-10} theme={null}
  import asyncio
  from fishaudio import AsyncFishAudio

  async def main():
      client = AsyncFishAudio()

      # Collect all chunks efficiently
      audio_stream = await client.tts.stream(text="Long text here")
      audio = await audio_stream.collect()  # Returns complete audio as bytes

  asyncio.run(main())
  ```
</CodeGroup>

For streaming to files or network without buffering in memory:

<CodeGroup>
  ```python Synchronous focus={5-9} theme={null}
  from fishaudio import FishAudio

  client = FishAudio()

  # Stream directly to file (memory efficient for large audio)
  audio_stream = client.tts.stream(text="Very long text...")
  with open("output.mp3", "wb") as f:
      for chunk in audio_stream:
          f.write(chunk)  # Write each chunk as it arrives
  ```

  ```python Asynchronous focus={7-11} theme={null}
  import asyncio
  from fishaudio import AsyncFishAudio

  async def main():
      client = AsyncFishAudio()

      # Stream directly to file (memory efficient for large audio)
      audio_stream = await client.tts.stream(text="Very long text...")
      with open("output.mp3", "wb") as f:
          async for chunk in audio_stream:
              f.write(chunk)  # Write each chunk as it arrives

  asyncio.run(main())
  ```
</CodeGroup>

<Note>
  Use `stream()` when you have complete text upfront. For real-time streaming with dynamically generated text (LLMs, live captions), use `stream_websocket()` instead.
</Note>

## Real-time WebSocket Streaming

For real-time applications where text is generated dynamically, use [`stream_websocket()`](/api-reference/sdk/python/resources#stream_websocket). This is perfect for LLM integrations, conversational AI, and live captions:

### Basic WebSocket Streaming

<CodeGroup>
  ```python Synchronous focus={5-15} theme={null}
  from fishaudio import FishAudio
  from fishaudio.utils import play

  client = FishAudio()

  # Stream dynamically generated text
  def text_chunks():
      yield "Hello, "
      yield "this is "
      yield "streaming text!"

  audio_stream = client.tts.stream_websocket(
      text_chunks(),
      latency="balanced"
  )

  play(audio_stream)
  ```

  ```python Asynchronous focus={7-16} theme={null}
  import asyncio
  from fishaudio import AsyncFishAudio
  from fishaudio.utils import play

  async def main():
      client = AsyncFishAudio()

      # Stream dynamically generated text
      async def text_chunks():
          yield "Hello, "
          yield "this is "
          yield "streaming text!"

      audio_stream = await client.tts.stream_websocket(
          text_chunks(),
          latency="balanced"
      )

      play(audio_stream)

  asyncio.run(main())
  ```
</CodeGroup>

### Understanding `FlushEvent`

The [`FlushEvent`](/api-reference/sdk/python/types#flushevent-objects) forces the TTS engine to immediately generate audio from the accumulated text buffer. This is useful when you want to ensure audio is generated at specific points, even if the buffer hasn't reached the optimal chunk size.

<CodeGroup>
  ```python Synchronous focus={6-18} theme={null}
  from fishaudio import FishAudio
  from fishaudio.types import FlushEvent

  client = FishAudio()

  # Use FlushEvent to force immediate generation
  def text_with_flush():
      yield "This is the first sentence. "
      yield "This is the second sentence. "
      yield FlushEvent()  # Force audio generation NOW
      yield "This starts a new segment. "
      yield "And continues here."
      yield FlushEvent()  # Force final generation

  audio_stream = client.tts.stream_websocket(text_with_flush())

  # Process each audio chunk as it arrives
  for chunk in audio_stream:
      print(f"Received audio chunk: {len(chunk)} bytes")
  ```

  ```python Asynchronous focus={8-20} theme={null}
  import asyncio
  from fishaudio import AsyncFishAudio
  from fishaudio.types import FlushEvent

  async def main():
      client = AsyncFishAudio()

      # Use FlushEvent to force immediate generation
      async def text_with_flush():
          yield "This is the first sentence. "
          yield "This is the second sentence. "
          yield FlushEvent()  # Force audio generation NOW
          yield "This starts a new segment. "
          yield "And continues here."
          yield FlushEvent()  # Force final generation

      audio_stream = await client.tts.stream_websocket(text_with_flush())

      # Process each audio chunk as it arrives
      async for chunk in audio_stream:
          print(f"Received audio chunk: {len(chunk)} bytes")

  asyncio.run(main())
  ```
</CodeGroup>

<Tip>
  Without `FlushEvent`, the engine automatically generates audio when the buffer reaches an optimal size. Use `FlushEvent` to control exactly when audio should be generated, which can reduce perceived latency in interactive applications.
</Tip>

### `TextEvent` vs Plain Strings

You can yield plain strings (recommended for simplicity) or use [`TextEvent`](/api-reference/sdk/python/types#textevent-objects) for explicit control:

<CodeGroup>
  ```python Synchronous focus={6-17} theme={null}
  from fishaudio import FishAudio
  from fishaudio.types import TextEvent

  client = FishAudio()

  # Both approaches are equivalent
  def text_as_strings():
      yield "Hello, "
      yield "world!"

  def text_as_events():
      yield TextEvent(text="Hello, ")
      yield TextEvent(text="world!")

  # Use whichever style you prefer
  audio1 = client.tts.stream_websocket(text_as_strings())
  audio2 = client.tts.stream_websocket(text_as_events())
  ```

  ```python Asynchronous focus={8-19} theme={null}
  import asyncio
  from fishaudio import AsyncFishAudio
  from fishaudio.types import TextEvent

  async def main():
      client = AsyncFishAudio()

      # Both approaches are equivalent
      async def text_as_strings():
          yield "Hello, "
          yield "world!"

      async def text_as_events():
          yield TextEvent(text="Hello, ")
          yield TextEvent(text="world!")

      # Use whichever style you prefer
      audio1 = await client.tts.stream_websocket(text_as_strings())
      audio2 = await client.tts.stream_websocket(text_as_events())

  asyncio.run(main())
  ```
</CodeGroup>

### LLM Integration Pattern

WebSocket streaming shines when integrating with LLM streaming responses. The TTS engine acts as an accumulator, buffering text until it has enough to generate natural-sounding audio:

<CodeGroup>
  ```python Synchronous focus={5-19} theme={null}
  from fishaudio import FishAudio
  from fishaudio.utils import play

  client = FishAudio()

  # Simulate streaming LLM response
  def llm_stream():
      """Simulates text chunks from an LLM"""
      tokens = [
          "The ", "weather ", "today ", "is ", "sunny ",
          "with ", "clear ", "skies. ", "Perfect ",
          "for ", "outdoor ", "activities!"
      ]
      for token in tokens:
          yield token

  # Stream to speech in real-time
  audio_stream = client.tts.stream_websocket(llm_stream())
  play(audio_stream)
  ```

  ```python Asynchronous focus={7-21} theme={null}
  import asyncio
  from fishaudio import AsyncFishAudio
  from fishaudio.utils import play

  async def main():
      client = AsyncFishAudio()

      # Simulate streaming LLM response
      async def llm_stream():
          """Simulates text chunks from an LLM"""
          tokens = [
              "The ", "weather ", "today ", "is ", "sunny ",
              "with ", "clear ", "skies. ", "Perfect ",
              "for ", "outdoor ", "activities!"
          ]
          for token in tokens:
              yield token

      # Stream to speech in real-time
      audio_stream = await client.tts.stream_websocket(llm_stream())
      play(audio_stream)

  asyncio.run(main())
  ```
</CodeGroup>

<Note>
  The WebSocket connection automatically buffers incoming text and generates audio when it has accumulated enough context for natural-sounding speech. You don't need to manually batch tokens unless you want to force generation at specific points using `FlushEvent`.
</Note>

Learn more in the [WebSocket Streaming guide](/developer-guide/sdk-guide/python/websocket).

## Advanced Configuration

Comprehensive `TTSConfig` with all available parameters:

```python focus={3-24} theme={null}
from fishaudio.types import TTSConfig, Prosody

# All TTSConfig parameters
config = TTSConfig(
    # Audio output settings
    format="mp3",
    sample_rate=44100,         # Custom sample rate (optional)
    mp3_bitrate=192,           # 64, 128, or 192 kbps
    opus_bitrate=64,           # For Opus format: -1000, 24, 32, 48, or 64
    normalize=True,            # Normalize audio levels

    # Generation settings
    chunk_length=200,          # Characters per chunk (100-300)
    latency="balanced",        # "normal" or "balanced"

    # Voice/style settings
    reference_id="bf322df2096a46f18c579d0baa36f41d",  # Adrian
    prosody=Prosody(speed=1.1, volume=0),
    # references=[ReferenceAudio(...)]  # For instant cloning

    # Model parameters
    temperature=0.7,           # Randomness (0.0-1.0)
    top_p=0.7                  # Token selection (0.0-1.0)
)

# Use with any client
audio = client.tts.convert(text="Your text here", config=config)
```

<Tip>
  `TTSConfig` works the same for both sync and async clients. See [TTSConfig API Reference](/api-reference/sdk/python/types#ttsconfig-objects) for detailed documentation on each parameter and their defaults.
</Tip>

## Error Handling

Handle common TTS errors gracefully:

```python theme={null}
from fishaudio import FishAudio
from fishaudio.exceptions import (
    RateLimitError,
    ValidationError,
    NotFoundError,
    FishAudioError
)
import time

client = FishAudio()

try:
    audio = client.tts.convert(
        text="Your text here",
        reference_id="voice_id"
    )
except RateLimitError:
    print("Rate limit exceeded. Please wait before retrying.")
    time.sleep(60)  # Wait before retry
except NotFoundError:
    print("Voice model not found. Check the reference_id")
except ValidationError as e:
    print(f"Invalid request: {e}")
except FishAudioError as e:
    print(f"API error: {e}")
```

Common exceptions include [`RateLimitError`](/api-reference/sdk/python/exceptions#ratelimiterror-objects), [`ValidationError`](/api-reference/sdk/python/exceptions#validationerror-objects), [`NotFoundError`](/api-reference/sdk/python/exceptions#notfounderror-objects), and [`FishAudioError`](/api-reference/sdk/python/exceptions#fishaudioerror-objects).

## Best Practices

<AccordionGroup>
  <Accordion title="Chunk long text appropriately">
    For long texts, adjust `chunk_length` in `TTSConfig`:

    ```python theme={null}
    from fishaudio import FishAudio
    from fishaudio.types import TTSConfig

    client = FishAudio()

    audio = client.tts.convert(
        text="Very long text...",
        config=TTSConfig(chunk_length=250)  # Larger chunks for efficiency
    )
    ```
  </Accordion>

  <Accordion title="Cache frequently used audio">
    If you generate the same speech repeatedly, cache the results:

    ```python theme={null}
    import os
    from fishaudio import FishAudio
    from fishaudio.utils import save

    client = FishAudio()

    def get_or_generate_speech(text, cache_file):
        if os.path.exists(cache_file):
            with open(cache_file, "rb") as f:
                return f.read()

        audio = client.tts.convert(text=text)
        save(audio, cache_file)
        return audio
    ```
  </Accordion>

  <Accordion title="Handle rate limits gracefully">
    Implement exponential backoff for rate limits:

    ```python theme={null}
    from fishaudio import FishAudio
    from fishaudio.exceptions import RateLimitError
    import time

    client = FishAudio()

    def generate_with_retry(text, max_retries=3):
        for attempt in range(max_retries):
            try:
                return client.tts.convert(text=text)
            except RateLimitError as e:
                if attempt < max_retries - 1:
                    time.sleep(2 ** attempt)  # Exponential backoff
                else:
                    raise
    ```
  </Accordion>

  <Accordion title="Use appropriate latency modes">
    Balance speed vs quality based on your use case:

    ```python theme={null}
    from fishaudio import FishAudio

    client = FishAudio()

    # For real-time applications
    audio = client.tts.convert(text="Fast response", latency="balanced")

    # For highest quality
    audio = client.tts.convert(text="Best quality", latency="normal")
    ```
  </Accordion>
</AccordionGroup>

## Next Steps

<CardGroup cols={2}>
  <Card title="Voice Cloning" icon="clone" href="/developer-guide/sdk-guide/python/voice-cloning">
    Create custom voice models
  </Card>

  <Card title="WebSocket Streaming" icon="bolt" href="/developer-guide/sdk-guide/python/websocket">
    Real-time audio streaming
  </Card>

  <Card title="Fine-grained Control" icon="sliders" href="/developer-guide/core-features/fine-grained-control">
    Phoneme-level control and paralanguage
  </Card>

  <Card title="Best Practices" icon="lightbulb" href="/developer-guide/best-practices/">
    Production tips and optimization
  </Card>
</CardGroup>

## Related Resources

* [TTS API Reference](/api-reference/sdk/python/resources#tts) - Complete API documentation
* [Audio Formats Guide](/developer-guide/core-features/text-to-speech#audio-formats) - Format comparison
* [Emotion Reference](/api-reference/emotion-reference) - All available emotions
* [Utils Reference](/api-reference/sdk/python/utils) - Audio utilities
