> ## Documentation Index
> Fetch the complete documentation index at: https://hanabiaiinc-auto-go-api-docs.mintlify.site/llms.txt
> Use this file to discover all available pages before exploring further.

# Text to Speech

> Convert text to natural speech with Fish Audio JavaScript SDK

export const AudioTranscript = ({voices = []}) => {
  const [selectedVoice, setSelectedVoice] = useState(0);
  const [isPlaying, setIsPlaying] = useState(false);
  const [currentTime, setCurrentTime] = useState(0);
  const [duration, setDuration] = useState(0);
  const [isDropdownOpen, setIsDropdownOpen] = useState(false);
  const audioRef = useRef(null);
  const dropdownRef = useRef(null);
  useEffect(() => {
    const audio = audioRef.current;
    if (!audio) return;
    const updateTime = () => setCurrentTime(audio.currentTime);
    const updateDuration = () => setDuration(audio.duration);
    const handleEnded = () => setIsPlaying(false);
    audio.addEventListener('timeupdate', updateTime);
    audio.addEventListener('loadedmetadata', updateDuration);
    audio.addEventListener('ended', handleEnded);
    return () => {
      audio.removeEventListener('timeupdate', updateTime);
      audio.removeEventListener('loadedmetadata', updateDuration);
      audio.removeEventListener('ended', handleEnded);
    };
  }, []);
  useEffect(() => {
    const handleClickOutside = event => {
      if (dropdownRef.current && !dropdownRef.current.contains(event.target)) {
        setIsDropdownOpen(false);
      }
    };
    if (isDropdownOpen) {
      document.addEventListener('mousedown', handleClickOutside);
    }
    return () => {
      document.removeEventListener('mousedown', handleClickOutside);
    };
  }, [isDropdownOpen]);
  useEffect(() => {
    if (audioRef.current) {
      audioRef.current.pause();
      audioRef.current.load();
      setIsPlaying(false);
      setCurrentTime(0);
    }
  }, [selectedVoice]);
  const togglePlay = () => {
    if (isPlaying) {
      audioRef.current.pause();
    } else {
      audioRef.current.play();
    }
    setIsPlaying(!isPlaying);
  };
  const handleProgressChange = e => {
    const newTime = parseFloat(e.target.value);
    audioRef.current.currentTime = newTime;
    setCurrentTime(newTime);
  };
  const formatTime = time => {
    if (isNaN(time)) return '0:00';
    const minutes = Math.floor(time / 60);
    const seconds = Math.floor(time % 60);
    return `${minutes}:${seconds.toString().padStart(2, '0')}`;
  };
  const currentVoice = voices[selectedVoice];
  return <div className="border rounded-lg bg-card border-gray-200 dark:border-gray-800">
      {}
      <div className="grid grid-cols-3 items-center px-3 py-1.5 bg-muted border-b border-gray-200 dark:border-gray-800">
        <span className="text-xs font-medium">Listen to Page</span>

        <span className="text-xs font-semibold text-muted-foreground text-center">Powered by Fish Audio S1</span>

        {voices.length > 1 ? <div className="relative justify-self-end" ref={dropdownRef}>
            <button onClick={() => setIsDropdownOpen(!isDropdownOpen)} className="flex items-center gap-1.5 px-3 py-1 rounded-full bg-muted hover:bg-gray-200 dark:hover:bg-gray-700 transition-all duration-200 cursor-pointer text-xs">
              <span className="text-muted-foreground">Voice:</span>
              <span className="font-medium">{voices[selectedVoice]?.name}</span>
              <svg className={`w-3 h-3 transition-transform duration-200 ${isDropdownOpen ? 'rotate-180' : ''}`} fill="none" stroke="currentColor" viewBox="0 0 24 24">
                <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 9l-7 7-7-7" />
              </svg>
            </button>

            {isDropdownOpen && <div className="absolute right-0 mt-1 w-auto bg-white dark:bg-black border border-gray-200 dark:border-gray-700 rounded-lg overflow-hidden z-50">
                {voices.map((voice, index) => <button key={index} onClick={() => {
    setSelectedVoice(index);
    setIsDropdownOpen(false);
  }} className={`w-full px-3 py-1.5 text-left text-xs hover:bg-gray-100 dark:hover:bg-gray-800 transition-colors flex items-center gap-2 ${index === selectedVoice ? 'bg-gray-100 dark:bg-gray-800 font-medium' : ''}`}>
                    {voice.id && <img src={`https://public-platform.r2.fish.audio/coverimage/${voice.id}`} alt={voice.name} className="w-5 h-5 rounded-full m-0 flex-shrink-0 object-cover" />}
                    <span className="flex-1 whitespace-nowrap">{voice.name}</span>
                  </button>)}
              </div>}
          </div> : <div className="justify-self-end" />}
      </div>

      {}
      <div className="px-3 py-1.5 bg-card">
        <audio ref={audioRef} src={currentVoice?.url} preload="metadata" />

        <div className="flex items-center gap-2">
          {}
          <button onClick={togglePlay} className="flex-shrink-0 w-6 h-6 flex items-center justify-center bg-gray-300 dark:bg-gray-600 text-gray-800 dark:text-gray-200 rounded-full hover:opacity-80 transition-opacity relative overflow-hidden" aria-label={isPlaying ? 'Pause' : 'Play'}>
            <div className="transition-transform duration-300 ease-in-out" style={{
    transform: isPlaying ? 'rotate(180deg)' : 'rotate(0deg)'
  }}>
              {isPlaying ? <svg className="w-3 h-3" fill="currentColor" viewBox="0 0 24 24">
                  <path d="M6 4h4v16H6V4zm8 0h4v16h-4V4z" />
                </svg> : <svg className="w-3 h-3 ml-0.5" fill="currentColor" viewBox="0 0 24 24">
                  <path d="M8 5v14l11-7z" />
                </svg>}
            </div>
          </button>

          {}
          <div className="flex-1 flex items-center gap-2">
            <span className="text-xs font-mono text-gray-500 dark:text-gray-400 min-w-[35px]">
              {formatTime(currentTime)}
            </span>

            <div className="flex-1 relative h-1 bg-gray-200 dark:bg-gray-700 rounded-full overflow-hidden">
              <div className="absolute top-0 left-0 h-full bg-gray-400 dark:bg-gray-500 transition-all duration-100" style={{
    width: `${duration ? currentTime / duration * 100 : 0}%`
  }} />
              <input type="range" min="0" max={duration || 0} value={currentTime} onChange={handleProgressChange} className="absolute top-0 left-0 w-full h-full opacity-0 cursor-pointer" />
            </div>
            <span className="text-xs font-mono text-gray-500 dark:text-gray-400 min-w-[35px]">
              {formatTime(duration)}
            </span>
          </div>
        </div>
      </div>
    </div>;
};

<AudioTranscript
  voices={[
{
  "id": "8ef4a238714b45718ce04243307c57a7",
  "name": "E-girl",
  "url": "https://pub-b995142090474379a930b856ab79b4d4.r2.dev/audio/javascript-text-to-speech/8ef4a238714b45718ce04243307c57a7.mp3"
},
{
  "id": "802e3bc2b27e49c2995d23ef70e6ac89",
  "name": "Energetic Male",
  "url": "https://pub-b995142090474379a930b856ab79b4d4.r2.dev/audio/javascript-text-to-speech/802e3bc2b27e49c2995d23ef70e6ac89.mp3"
},
{
  "id": "933563129e564b19a115bedd57b7406a",
  "name": "Sarah",
  "url": "https://pub-b995142090474379a930b856ab79b4d4.r2.dev/audio/javascript-text-to-speech/933563129e564b19a115bedd57b7406a.mp3"
},
{
  "id": "bf322df2096a46f18c579d0baa36f41d",
  "name": "Adrian",
  "url": "https://pub-b995142090474379a930b856ab79b4d4.r2.dev/audio/javascript-text-to-speech/bf322df2096a46f18c579d0baa36f41d.mp3"
},
{
  "id": "b347db033a6549378b48d00acb0d06cd",
  "name": "Selene",
  "url": "https://pub-b995142090474379a930b856ab79b4d4.r2.dev/audio/javascript-text-to-speech/b347db033a6549378b48d00acb0d06cd.mp3"
},
{
  "id": "536d3a5e000945adb7038665781a4aca",
  "name": "Ethan",
  "url": "https://pub-b995142090474379a930b856ab79b4d4.r2.dev/audio/javascript-text-to-speech/536d3a5e000945adb7038665781a4aca.mp3"
}
]}
/>

## Prerequisites

<AccordionGroup>
  <Accordion icon="user-plus" title="Create a Fish Audio account">
    Sign up for a free Fish Audio account to get started with our API.

    1. Go to [fish.audio/auth/signup](https://fish.audio/auth/signup)
    2. Fill in your details to create an account, complete steps to verify your account.
    3. Log in to your account and navigate to the [API section](https://fish.audio/app/api-keys)
  </Accordion>

  <Accordion icon="key" title="Get your API key">
    Once you have an account, you'll need an API key to authenticate your requests.

    1. Log in to your [Fish Audio Dashboard](https://fish.audio/app/api-keys/)
    2. Navigate to the API Keys section
    3. Click "Create New Key" and give it a descriptive name, set a expiration if desired
    4. Copy your key and store it securely

    <Warning>Keep your API key secret! Never commit it to version control or share it publicly.</Warning>
  </Accordion>
</AccordionGroup>

## Basic Usage

Generate speech from text:

```typescript theme={null}
import { FishAudioClient, play } from "fish-audio";

const fishAudio = new FishAudioClient({ apiKey: process.env.FISH_API_KEY });

const audio = await fishAudio.textToSpeech.convert({
  text: "Hello, world!",
});

await play(audio);
```

## Using Voice Models

Specify a voice model for consistent voice generation:

```typescript theme={null}
import { FishAudioClient } from "fish-audio";

const fishAudio = new FishAudioClient();

const audio = await fishAudio.textToSpeech.convert({
  text: "This is my custom voice",
  reference_id: "your_model_id", // Your model ID from fish.audio
});

await play(audio);
```

### Getting Model IDs

The `reference_id` is the model ID from the URL when viewing a model on Fish Audio:

* Model URL: `https://fish.audio/m/802e3bc2b27e49c2995d23ef70e6ac89`
* Reference ID: `802e3bc2b27e49c2995d23ef70e6ac89`

You can also get model IDs programmatically:

```typescript theme={null}
// List your models
const results = await fishAudio.voices.search({ self: true });
for (const model of results.items ?? []) {
  console.log(`${model.title}: ${model._id}`);
}

// Get specific model details
const model = await fishAudio.voices.get("your_model_id");
console.log(`Model: ${model.title}, ID: ${model._id}`);
```

## Emotions

Add emotional expressions to your text:

```typescript theme={null}
import type { TTSRequest } from "fish-audio";

const text = `
(happy) I'm excited to share this!
(sad) Unfortunately, it didn't work out.
(whispering) This is a secret.
`;

const request: TTSRequest = { text, reference_id: "model_id" };
```

Common emotions: `(happy)`, `(sad)`, `(angry)`, `(excited)`, `(calm)`, `(surprised)`, `(whispering)`, `(shouting)`, `(laughing)`, `(sighing)`

For more advanced control over speech generation, including phoneme-level control and additional paralanguage features, see [Fine-grained Control](/developer-guide/core-features/fine-grained-control).

## Audio Formats

Choose output format based on your needs:

```typescript theme={null}
// MP3 (default)
await fishAudio.textToSpeech.convert({ text: "...", format: "mp3", mp3_bitrate: 192 });

// WAV - uncompressed
await fishAudio.textToSpeech.convert({ text: "...", format: "wav", sample_rate: 44100 });

// Opus - efficient for streaming
await fishAudio.textToSpeech.convert({ text: "...", format: "opus", opus_bitrate: 48 });

// PCM - raw audio data
await fishAudio.textToSpeech.convert({ text: "...", format: "pcm", sample_rate: 16000 });
```

## Prosody Control

Adjust speech speed and volume:

```typescript theme={null}
const audio = await fishAudio.textToSpeech.convert({
  text: "Adjusted speech",
  prosody: {
    speed: 1.2,  // 0.5 - 2.0
    volume: 5,   // -20 - 20
  },
});
```

## Advanced Parameters

Fine-tune generation:

```typescript theme={null}
const audio = await client.textToSpeech.convert({
  text: "Your text here",
  chunk_length: 200,    // Characters per chunk (100-300)
  normalize: true,      // Normalize text
  latency: "balanced",  // "normal" or "balanced"
  temperature: 0.7,     // Randomness (0.0-1.0)
  top_p: 0.7,           // Token selection (0.0-1.0)
});
```

## Choosing Backend

Our state-of-the-art [S1 model](/developer-guide/models-pricing/models-overview)
is the default backend model for TTS. Optionally specify the model via the second argument (`backend: Backends`).

```typescript theme={null}
const audio = await fishAudio.textToSpeech.convert({
  text: "Hello, world!",
}, "s1");
```

## Streaming

For real-time streaming, see the [WebSocket guide](/developer-guide/sdk-guide/javascript/websocket).

## Error Handling

Handle common errors:

```typescript theme={null}
async function generateWithRetry(request: Record<string, unknown>, maxRetries = 3) {
  const fishAudio = new FishAudioClient();
  for (let attempt = 0; attempt < maxRetries; attempt++) {
    try {
      return await fishAudio.textToSpeech.convert(request);
    } catch (e: any) {
      const status = e?.status || e?.response?.status;
      if (status === 429) await new Promise(r => setTimeout(r, 2 ** attempt * 1000));
      else if (status === 401) throw new Error("Invalid API key");
      else throw e;
    }
  }
}
```

## Request Parameters

| Parameter      | Type      | Description          | Default    |
| -------------- | --------- | -------------------- | ---------- |
| `text`         | string    | Text to convert      | Required   |
| `reference_id` | string    | Voice model ID       | None       |
| `references`   | object\[] | Reference audio      | \[]        |
| `format`       | string    | Audio format         | "mp3"      |
| `chunk_length` | number    | Chunk size (100-300) | 200        |
| `normalize`    | boolean   | Normalize text       | true       |
| `latency`      | string    | Speed vs quality     | "balanced" |
| `prosody`      | object    | Speed/volume         | None       |
| `temperature`  | number    | Randomness           | 0.7        |
| `top_p`        | number    | Token selection      | 0.7        |

## Next Steps

* [Fine-grained control](/developer-guide/core-features/fine-grained-control) for phoneme-level control and paralanguage
* [Voice cloning](/developer-guide/sdk-guide/javascript/voice-cloning) for custom voices
* [WebSocket streaming](/developer-guide/sdk-guide/javascript/websocket) for real-time apps
* [Guide and Best Practices](/developer-guide/core-features/text-to-speech) for production use
* [API reference](/api-reference/endpoint/openapi-v1/text-to-speech) for direct API calls
