admin管理员组文章数量:1279055
I'm using ElevenLabs Conversational AI API, which uses Websockets. And I'm using Unity C# on the client side.
I can receive audio just fine, but when I send my reply to it, I don't get a response.
After I send my audio, the server just keeps sending "ping" forever (to which I reply with the "pong").
What's strange is when I go to the ElevenLabs website, and check the call history and play it, it appears that I actually did send my audio, but the AI hangs up shortly as if I didn't say anything.
Just to make sure it isn't just AI being stupid, I talked with it directly on the site and it works perfectly.
Am I doing something wrong? I've tried changing websocket libraries, json parsers, and more, but nothing seems to work.
This is the doc I was following:
here's the code:
using UnityEngine;
using NativeWebSocket;
using System;
using System.Threading.Tasks;
using System.Net.Http;
using System.Collections;
class SendSound : MonoBehaviour
{
public AudioClip clip;
string userAudioSend;
AudioSource audioSource;
AudioClip latestResponse;
WebSocket websocket;
[System.Serializable]
struct MessageType
{
public string type;
};
[System.Serializable]
struct PingEvent
{
public int event_id;
}
[System.Serializable]
struct MessagePing
{
public MessageType type;
public PingEvent ping_event;
}
[System.Serializable]
struct SignedURL
{
public string signed_url;
}
[System.Serializable]
struct AudioEvent
{
public string audio_base_64;
public int event_id;
}
[System.Serializable]
struct AudioResponse
{
public string type;
public AudioEvent audio_event;
}
private async void Start()
{
audioSource = GetComponent<AudioSource>();
string signedUrlJson= await GetSignedUrl("MY_API_KEY", "AGENT_ID");
SignedURL signedUrl = JsonUtility.FromJson<SignedURL>(signedUrlJson);
websocket = new WebSocket(signedUrl.signed_url);
websocket.OnOpen += () =>
{
Debug.Log("Connection open!");
if(websocket.State != WebSocketState.Open)
{
Debug.LogError("Not open");
}
websocket.SendText("{ \"type\": \"conversation_initiation_client_data\" }");
};
websocket.OnError += (e) =>
{
Debug.Log("Error! " + e);
};
websocket.OnClose += (e) =>
{
Debug.Log("Connection closed!");
};
websocket.OnMessage += (bytes) =>
{
Debug.Log("OnMessage!");
var message = System.Text.Encoding.UTF8.GetString(bytes);
Debug.Log(message);
MessageType mt = JsonUtility.FromJson<MessageType>(message);
switch(mt.type)
{
case "ping":
MessagePing meping = JsonUtility.FromJson<MessagePing>(message);
Debug.Log("WE PONGED: " + "{\"type\":\"pong\",\"event_id\":" + meping.ping_event.event_id + " }");
websocket.SendText("{\"type\":\"pong\",\"event_id\":" + meping.ping_event.event_id + "}");
break;
case "audio":
AudioResponse audioResponse = JsonUtility.FromJson<AudioResponse>(message);
latestResponse = Base64ToAudioClip(audioResponse.audio_event.audio_base_64);
break;
case "interruption":
audioSource.Stop();
break;
}
};
userAudioSend = "{\"user_audio_chunk\":\"" + ConvertAudioClipToBase64(clip) + "\"}";
StartCoroutine(DispatchQueue());
StartCoroutine(UpdateSound());
Invoke(nameof(SendAudio), 3.5f);
await websocket.Connect();
}
async Task SendAudio()
{
if (websocket.State == WebSocketState.Open)
{
Debug.Log("WE SENT AUDIO");
await websocket.SendText(userAudioSend);
}
else
{
Debug.Log("NOT SEND: " + Enum.GetName(websocket.State.GetType(), websocket.State));
}
}
IEnumerator UpdateSound()
{
while (true)
{
if (audioSource.clip != latestResponse && !audioSource.isPlaying)
{
audioSource.clip = latestResponse;
audioSource.Play();
}
yield return null;
}
}
IEnumerator DispatchQueue()
{
while(true)
{
websocket.DispatchMessageQueue();
yield return null;
}
}
private async void OnApplicationQuit()
{
await websocket.Close();
}
public static string ConvertAudioClipToBase64(AudioClip clip)
{
if (clip == null) return null;
// 1. Resample to 16000Hz if needed
float[] samples = ResampleTo16000(clip);
// 2. Convert to 16-bit PCM bytes
byte[] pcmBytes = ConvertTo16BitPCM(samples);
// 3. Base64 encode (RAW PCM, NO HEADER)
return Convert.ToBase64String(pcmBytes);
}
// Resample audio to 16000Hz
private static float[] ResampleTo16000(AudioClip clip)
{
float[] original = new float[clip.samples * clip.channels];
clip.GetData(original, 0);
if (clip.frequency == 16000) return original;
float ratio = (float)clip.frequency / 16000f;
float[] resampled = new float[Mathf.RoundToInt(original.Length / ratio)];
for (int i = 0; i < resampled.Length; i++)
{
float srcIndex = i * ratio;
int prevIndex = Mathf.FloorToInt(srcIndex);
int nextIndex = Mathf.Min(prevIndex + 1, original.Length - 1);
float lerp = srcIndex - prevIndex;
resampled[i] = Mathf.Lerp(original[prevIndex], original[nextIndex], lerp);
}
return resampled;
}
// Convert float[-1,1] samples to 16-bit PCM
private static byte[] ConvertTo16BitPCM(float[] samples)
{
byte[] pcm = new byte[samples.Length * 2];
for (int i = 0; i < samples.Length; i++)
{
// Clamp and convert to 16-bit range
short value = (short)(Mathf.Clamp(samples[i], -1f, 1f) * short.MaxValue);
Buffer.BlockCopy(BitConverter.GetBytes(value), 0, pcm, i * 2, 2);
}
return pcm;
}
public static AudioClip Base64ToAudioClip(string base64String)
{
// Decode the Base64 string into a byte array
byte[] audioBytes = Convert.FromBase64String(base64String);
// Calculate the number of samples (16-bit, so 2 bytes per sample)
int sampleCount = audioBytes.Length / 2;
float[] samples = new float[sampleCount];
// Convert each 16-bit sample to a normalized float
for (int i = 0; i < sampleCount; i++)
{
int offset = i * 2;
short sampleInt16 = BitConverter.ToInt16(audioBytes, offset);
samples[i] = sampleInt16 / 32768.0f; // Normalize to [-1, 1]
}
// Create the AudioClip (mono, 16000 Hz)
AudioClip audioClip = AudioClip.Create(
"DecodedAudio",
sampleCount,
1, // Mono
16000,
false // Do not stream
);
// Set the sample data
audioClip.SetData(samples, 0);
return audioClip;
}
public static async Task<string> GetSignedUrl(string apiKey, string agentId)
{
var baseUrl = ";;
var url = $"{baseUrl}?agent_id={Uri.EscapeDataString(agentId)}";
Debug.Log("Final URL: " + url);
using var httpClient = new HttpClient();
var request = new HttpRequestMessage(HttpMethod.Get, url);
request.Headers.Add("xi-api-key", apiKey);
try
{
var response = await httpClient.SendAsync(request);
if (response.IsSuccessStatusCode)
{
return await response.Content.ReadAsStringAsync();
}
else
{
Debug.Log("WE ERORRED");
return $"Error: {response.StatusCode}";
}
}
catch (HttpRequestException ex)
{
return $"Request error: {ex.Message}";
}
}
}
本文标签: cElevenLabs Websocket server not sending responseStack Overflow
版权声明:本文标题:c# - ElevenLabs Websocket server not sending response - Stack Overflow 内容由网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:http://www.betaflare.com/web/1741272591a2369550.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论