I am using instant mode in tts stream api, it was expected to give first audio chunk in 200ms, but it is taking about 2 secs, which is much higher...Please find the implementation below
async def stream_tts(self, message_text):
client = AsyncHumeClient(api_key=HUME_API_KEY)
voice_id = "87c85b13-9dab-4b4d-bdd1-70182301d671"
utterances = [
PostedUtterance(text = message_text,
speed = 1,
voice = PostedUtteranceVoiceWithId(
id=voice_id,
provider="CUSTOM_VOICE")
),
]
logging.info(f"Sending TTS request with voice ID: {voice_id}")
async for snippet in client.tts.synthesize_json_streaming(
utterances=utterances,
num_generations=1,
split_utterances=False,
instant_mode=True,
format=FormatWav()
):
audio_data = base64.b64decode(snippet.audio)
logging.info(f"Received raw wav snippet of size {len(audio_data)} bytes")
Can you please help me improve the latency for this