Official Python SDK for IndusLabs Voice API - providing seamless Text-to-Speech (TTS) and Speech-to-Text (STT) capabilities with both synchronous and asynchronous support.
Need an API Key? If you don't have an API key yet, you can create one here: https://playground.induslabs.io/register
Install the SDK using pip. Requires Python 3.7 or higher.
pip install induslabs
Initialize the client with your API key and start making requests immediately.
from induslabs import Client
# Initialize with API key
client = Client(api_key="your_api_key_here")
# Or use environment variable
# export INDUSLABS_API_KEY="your_api_key_here"
client = Client()
# Text-to-Speech
response = client.tts.speak(
text="Hello, this is a test",
voice="Indus-hi-maya"
)
response.save("output.wav")
# Speech-to-Text
result = client.stt.transcribe("audio.wav", language="hi")
print(result.text)
print(f"Detected: {result.language_detected}")
Convert text to speech with simple method calls. The SDK handles all API communication and response parsing.
from induslabs import Client
client = Client(api_key="your_api_key")
# Simple synthesis
response = client.tts.speak(
text="Hello, this is a test",
voice="Indus-hi-maya"
)
# Save to file
response.save("output.wav")
# Access metadata
print(f"Sample Rate: {response.sample_rate}Hz")
print(f"Channels: {response.channels}")
print(f"Format: {response.format}")
print(f"Request ID: {response.request_id}")
Enable streaming to receive audio chunks as they're generated, reducing latency for real-time applications.
"""
Streaming Text-to-Speech Example
This example demonstrates how to stream audio from the IndusLabs TTS API
and play it in real-time while simultaneously saving to a file.
Requirements:
pip install induslabs pyaudio
Note: PyAudio may require additional system dependencies:
- Ubuntu/Debian: sudo apt-get install portaudio19-dev python3-pyaudio
- macOS: brew install portaudio
- Windows: PyAudio wheels available on PyPI
"""
import queue
import threading
import time
import pyaudio
from induslabs import Client
class StreamingTTSPlayer:
"""Handles real-time streaming playback of TTS audio with buffering"""
def __init__(self, sample_rate=24000, channels=1, chunk_size=4096):
self.sample_rate = sample_rate
self.channels = channels
self.chunk_size = chunk_size
self.audio_queue = queue.Queue()
self.streaming_complete = False
self.playing = False
self.p = pyaudio.PyAudio()
self.stream = None
def _stream_audio(self, response, save_path=None):
"""Receives audio chunks from API and queues them for playback"""
file_handle = open(save_path, "wb") if save_path else None
try:
for chunk in response.iter_bytes(chunk_size=self.chunk_size):
self.audio_queue.put(chunk)
if file_handle:
file_handle.write(chunk)
finally:
self.streaming_complete = True
if file_handle:
file_handle.close()
def _play_audio(self):
"""Plays audio chunks from the queue"""
while self.playing:
try:
chunk = self.audio_queue.get(timeout=0.05)
if chunk is None:
break
self.stream.write(chunk)
except queue.Empty:
if self.streaming_complete:
break
def play(self, response, save_path=None, prebuffer_seconds=1.0):
"""
Stream and play TTS audio in real-time
Args:
response: Streaming response from client.tts.speak()
save_path: Optional path to save audio file
prebuffer_seconds: Seconds of audio to buffer before playback starts
"""
# Open audio output stream
self.stream = self.p.open(
format=pyaudio.paInt16,
channels=self.channels,
rate=self.sample_rate,
output=True,
frames_per_buffer=self.chunk_size
)
self.playing = True
self.streaming_complete = False
# Start streaming thread
stream_thread = threading.Thread(
target=self._stream_audio,
args=(response, save_path),
daemon=True
)
stream_thread.start()
# Wait for initial buffer
chunks_needed = int((self.sample_rate * self.channels * 2 / self.chunk_size) * prebuffer_seconds)
print(f"Buffering {prebuffer_seconds}s of audio...")
while self.audio_queue.qsize() < chunks_needed:
if self.streaming_complete:
break
time.sleep(0.1)
print("Playing audio...
")
# Start playback thread
play_thread = threading.Thread(target=self._play_audio, daemon=True)
play_thread.start()
# Wait for completion
stream_thread.join()
play_thread.join()
# Cleanup
self.stream.stop_stream()
self.stream.close()
def close(self):
"""Release audio resources"""
self.p.terminate()
def main():
# Initialize the client
client = Client() # Uses INDUSLABS_API_KEY environment variable
# Text to convert to speech
text = """
Artificial intelligence is transforming the way we live and work.
From self-driving cars to personalized healthcare, AI is revolutionizing
various industries. Machine learning algorithms are becoming more advanced,
enabling systems to recognize patterns and make predictions with incredible accuracy.
"""
print("=" * 60)
print("IndusLabs Streaming TTS Example")
print("=" * 60)
# Create streaming response
response = client.tts.speak(
text=text,
voice="Indus-hi-maya",
stream=True # Enable streaming
)
# Create player and play audio
player = StreamingTTSPlayer()
try:
# Play audio in real-time and save to file
player.play(response, save_path="output.wav", prebuffer_seconds=1.0)
print("Playback complete!")
print("Audio saved to: output.wav")
finally:
player.close()
if __name__ == "__main__":
main()
Process audio in memory without saving to disk. Useful for temporary processing or immediate playback.
response = client.tts.speak(
text="In-memory audio",
voice="Indus-hi-maya"
)
# Get as file-like object (BytesIO)
audio_file = response.to_file_object()
# Get raw bytes
audio_bytes = response.get_audio_data()
# Pass to other libraries
import wave
with wave.open(audio_file, 'rb') as wf:
frames = wf.readframes(wf.getnframes())
Choose between WAV, MP3, or PCM formats based on your needs.
# WAV format (default, best quality)
wav_response = client.tts.speak(
text="WAV format audio",
voice="Indus-hi-maya",
output_format="wav"
)
# MP3 format (smaller file size)
mp3_response = client.tts.speak(
text="MP3 format audio",
voice="Indus-hi-maya",
output_format="mp3"
)
# PCM format (raw audio data)
pcm_response = client.tts.speak(
text="PCM format audio",
voice="Indus-hi-maya",
output_format="pcm"
)
Transcribe audio files with automatic language detection and detailed metrics.
from induslabs import Client
client = Client(api_key="your_api_key")
# Transcribe audio file
result = client.stt.transcribe("audio.wav", language="hi")
# Access transcription
print(result.text)
# Access metadata
print(f"Detected: {result.language_detected}")
print(f"Duration: {result.audio_duration_seconds}s")
print(f"Processing: {result.processing_time_seconds}s")
print(f"Credits: {result.credits_used}")
print(f"Request ID: {result.request_id}")
Transcribe audio from memory without saving to disk first.
from io import BytesIO
# From file handle
with open("audio.wav", "rb") as f:
result = client.stt.transcribe(f, language="hi")
print(result.text)
# From BytesIO
audio_buffer = BytesIO(audio_bytes)
result = client.stt.transcribe(audio_buffer, language="hi")
print(result.text)
Use async methods for better performance with concurrent requests and non-blocking operations.
import asyncio
from induslabs import Client
async def main():
# Using context manager (auto cleanup)
async with Client(api_key="your_api_key") as client:
# Async TTS
response = await client.tts.speak_async(
text="Async speech synthesis",
voice="Indus-hi-maya",
stream=True
)
# Async iteration over chunks
async for chunk in response.iter_bytes():
# Process chunk
await process_audio_chunk(chunk)
# Async STT
result = await client.stt.transcribe_async(
"audio.wav",
language="hi"
)
print(result.text)
asyncio.run(main())
await client.close() or use the async context manager.Process multiple requests in parallel for better throughput.
from concurrent.futures import ThreadPoolExecutor
from induslabs import Client
client = Client(api_key="your_api_key")
texts = ["Text 1", "Text 2", "Text 3", "Text 4"]
def generate_speech(text):
return client.tts.speak(text=text, voice="Indus-hi-maya")
# Generate multiple files concurrently
with ThreadPoolExecutor(max_workers=4) as executor:
responses = list(executor.map(generate_speech, texts))
# Save all responses
for i, response in enumerate(responses):
response.save(f"output_{i}.wav")
import asyncio
from induslabs import Client
async def main():
client = Client(api_key="your_api_key")
texts = ["Text 1", "Text 2", "Text 3", "Text 4"]
# Create tasks
tasks = [
client.tts.speak_async(text=text, voice="Indus-hi-maya")
for text in texts
]
# Run concurrently
responses = await asyncio.gather(*tasks)
# Save all
for i, response in enumerate(responses):
response.save(f"output_{i}.wav")
await client.close()
asyncio.run(main())
List and filter available voices programmatically.
# List all voices
voices = client.voices.list()
# Iterate through voices
for voice in voices.voices:
print(f"{voice.name} ({voice.voice_id})")
print(f" Language: {voice.language}")
print(f" Gender: {voice.gender}")
# Filter by language
hindi_voices = voices.get_voices_by_language("hindi")
# Filter by gender
female_voices = voices.get_voices_by_gender("female")
# Get specific voice
voice = voices.get_voice_by_id("Indus-hi-Indus-hi-maya")
# Get all voice IDs
voice_ids = voices.list_voice_ids()
# Async version
voices = await client.voices.list_async()
Handle errors gracefully with comprehensive exception handling.
from induslabs import Client
import requests
client = Client(api_key="your_api_key")
try:
response = client.tts.speak(
text="Test audio",
voice="Indus-hi-maya"
)
response.save("output.wav")
except requests.exceptions.HTTPError as e:
# HTTP errors (4xx, 5xx)
print(f"HTTP error: {e}")
print(f"Status: {e.response.status_code}")
print(f"Response: {e.response.text}")
except ValueError as e:
# Invalid parameters
print(f"Invalid parameter: {e}")
except FileNotFoundError as e:
# File not found (for STT)
print(f"File not found: {e}")
except Exception as e:
# Other errors
print(f"Error occurred: {e}")
Understanding the response objects returned by the SDK.
response = client.tts.speak(text="Hello", voice="Indus-hi-maya")
# Properties
response.content # bytes: Raw audio data
response.headers # dict: Response headers
response.request_id # str: Unique request identifier
response.sample_rate # int: Audio sample rate (default: 24000)
response.channels # int: Number of channels (default: 1)
response.bit_depth # int: Bit depth (default: 16)
response.format # str: Audio format (wav/mp3/pcm)
# Methods
response.save(filepath) # Save to file
response.get_audio_data() # Get raw bytes
response.to_file_object() # Get BytesIO object
response = client.tts.speak(text="Hello", voice="Indus-hi-maya", stream=True)
# Properties (same as TTSResponse)
response.sample_rate
response.channels
response.format
# Methods
for chunk in response.iter_bytes(chunk_size=8192):
# Process each chunk
pass
response.save(filepath) # Save streamed audio
response.to_file_object() # Convert to BytesIO
result = client.stt.transcribe("audio.wav", language="hi")
# Properties
result.text # str: Transcribed text
result.request_id # str: Request identifier
result.language_detected # str: Detected language
result.audio_duration_seconds # float: Audio duration
result.processing_time_seconds # float: Processing time
result.first_token_time_seconds # float: Time to first token
result.credits_used # float: Credits consumed
# Methods
result.to_dict() # Get raw response dict
str(result) # Returns result.text
voices = client.voices.list()
# Properties
voices.voices # List[Voice]: All available voices
voices.status_code # int: Response status
voices.message # str: Response message
# Methods
voices.get_voices_by_language("hindi") # Filter by language
voices.get_voices_by_gender("female") # Filter by gender
voices.get_voice_by_id("voice_id") # Get specific voice
voices.list_voice_ids() # List all IDs
voices.to_dict() # Raw response data
Configure the SDK using environment variables for better security.
# Set API key
export INDUSLABS_API_KEY="your_api_key_here"
# Now initialize without passing api_key
python -c "from induslabs import Client; client = Client()"
# Or in your .env file
INDUSLABS_API_KEY=your_api_key_here
import os
from induslabs import Client
# Load from environment
client = Client()
# Or load from .env file using python-dotenv
from dotenv import load_dotenv
load_dotenv()
client = Client() # Automatically uses INDUSLABS_API_KEY
# Error: API key must be provided
# Solution: Set API key via parameter or environment variable
export INDUSLABS_API_KEY="your_api_key"
# Or
client = Client(api_key="your_api_key")
# Error: No module named 'induslabs'
# Solution: Install the package
pip install induslabs
# Or upgrade to latest version
pip install --upgrade induslabs
# Warning: Unclosed client session
# Solution: Use context manager or explicit close
async with Client(api_key="key") as client:
# Your code here
pass # Auto cleanup
# Or
client = Client(api_key="key")
try:
# Your code here
pass
finally:
await client.close()
# Error: ValueError: output_format must be 'wav', 'mp3', or 'pcm'
# Solution: Use valid format
response = client.tts.speak(
text="Test",
voice="Indus-hi-maya",
output_format="wav" # Must be: wav, mp3, or pcm
)