Web端对接Demo
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<title>TTS 测试</title>
</head>
<body>
<h1>TTS 测试页面</h1>
<textarea id="textInput" rows="4" cols="50">真正的成长,是学会接受自己的不完美。</textarea><br>
<button onclick="sendText()">发送文本</button>
<script>
class PCMAudioPlayer {
constructor(sampleRate) {
this.sampleRate = sampleRate;
this.audioContext = null;
this.audioQueue = [];
this.isPlaying = false;
this.currentSource = null;
const bufferThreshold = 2;
}
connect() {
if (!this.audioContext) {
this.audioContext = new (window.AudioContext || window.webkitAudioContext)();
}
}
pushPCM(arrayBuffer) {
this.audioQueue.push(arrayBuffer);
this._playNextAudio();
}
/**
* 将arrayBuffer转为audioBuffer
*/
_bufferPCMData(pcmData) {
const sampleRate = this.sampleRate; // 设置为 PCM 数据的采样率
const length = pcmData.byteLength / 2; // 假设 PCM 数据为 16 位,需除以 2
const audioBuffer = this.audioContext.createBuffer(1, length, sampleRate);
const channelData = audioBuffer.getChannelData(0);
const int16Array = new Int16Array(pcmData); // 将 PCM 数据转换为 Int16Array
for (let i = 0; i < length; i++) {
// 将 16 位 PCM 转换为浮点数 (-1.0 到 1.0)
channelData[i] = int16Array[i] / 32768; // 16 位数据转换范围
}
let audioLength = length / sampleRate * 1000;
console.log(`prepare audio: ${length} samples, ${audioLength} ms`)
return audioBuffer;
}
async _playAudio(arrayBuffer) {
if (this.audioContext.state === 'suspended') {
await this.audioContext.resume();
}
const audioBuffer = this._bufferPCMData(arrayBuffer);
this.currentSource = this.audioContext.createBufferSource();
this.currentSource.buffer = audioBuffer;
this.currentSource.connect(this.audioContext.destination);
this.currentSource.onended = () => {
console.log('Audio playback ended.');
this.isPlaying = false;
this.currentSource = null;
this._playNextAudio(); // Play the next audio in the queue
};
this.currentSource.start();
this.isPlaying = true;
}
_playNextAudio() {
if (this.audioQueue.length > 0 && !this.isPlaying) {
// 计算总的字节长度
const totalLength = this.audioQueue.reduce((acc, buffer) => acc + buffer.byteLength, 0);
const combinedBuffer = new Uint8Array(totalLength);
let offset = 0;
// 将所有 audioQueue 中的 buffer 拼接到一个新的 Uint8Array 中
for (const buffer of this.audioQueue) {
combinedBuffer.set(new Uint8Array(buffer), offset);
offset += buffer.byteLength;
}
// 清空 audioQueue,因为我们已经拼接完所有数据
this.audioQueue = [];
// 发送拼接的 audio 数据给 playAudio
this._playAudio(combinedBuffer.buffer);
}
}
stop() {
if (this.currentSource) {
this.currentSource.stop(); // 停止当前音频播放
this.currentSource = null; // 清除音频源引用
this.isPlaying = false; // 更新播放状态
}
this.audioQueue = []; // 清空音频队列
console.log('Playback stopped and queue cleared.');
}
}
let player = new PCMAudioPlayer(24000);
player.connect()
player.stop()
// WebSocket URL 根据实际API文档填写
const socket = new WebSocket('wss://ws.coze.cn/v1/audio/speech?authorization=Bearer czs_l8r6XWz7Ogvh8diyHEyls4fnnsV4zPALaZQ019nI8yD8hB4wyDfmNeufVf3kckb6H');
socket.onmessage = function (event) {
try {
const message = JSON.parse(event.data);
if (message.event_type === 'speech.audio.update') {
const audioData = atob(message.data.delta);
console.log('audioData type ', typeof audioData);
const arrayBuffer = Uint8Array.from(audioData, c => c.charCodeAt(0)).buffer;
player.pushPCM(arrayBuffer)
}
} catch (error) {
console.error('解析消息失败:', error);
}
};
function sendText() {
const textInput = document.getElementById('textInput').value;
if (textInput) {
// 发送文本到WebSocket服务器
let append = {
"id": "event_id",
"event_type": "input_text_buffer.append",
"data": {
"delta": textInput
}
}
socket.send(JSON.stringify(append));
let submitData = {
"id": "event_id",
"event_type": "input_text_buffer.complete"
}
socket.send(JSON.stringify(submitData));
} else {
alert('请输入要转换为语音的文本');
}
}
</script>
</body>
</html>
PCMAudioPlayer
上面 demo 中的 PCMAudioPlayer 源码来自于阿里云TTS文档,在coze上没有找到怎么播放音频的demo, 想到了阿里云在文档方面做得比较好,结果真有。
下面是我用 AI 模型增加了一些代码注释,方便理解:
class PCMAudioPlayer {
constructor(sampleRate) {
this.sampleRate = sampleRate; // 音频采样率(单位:Hz),需与PCM数据实际采样率一致
this.audioContext = null; // Web Audio API上下文实例
this.audioQueue = []; // 存储待播放的PCM数据缓冲区队列
this.isPlaying = false; // 标识当前是否正在播放音频
this.currentSource = null; // 当前播放的音频源节点
const bufferThreshold = 2; // 未使用的缓冲区阈值(代码中未实现逻辑)
}
// 初始化或恢复Web Audio上下文
connect() {
if (!this.audioContext) {
// 创建音频上下文,兼容旧版webkit前缀
this.audioContext = new (window.AudioContext || window.webkitAudioContext)();
}
}
// 将PCM数据推入队列并尝试播放
pushPCM(arrayBuffer) {
this.audioQueue.push(arrayBuffer);
this._playNextAudio(); // 触发播放逻辑
}
/**
* 将16位有符号PCM数据转换为Web Audio兼容的AudioBuffer
* @param {ArrayBuffer} pcmData - 原始16位PCM数据
* @returns {AudioBuffer} - 标准化音频缓冲区对象
*/
_bufferPCMData(pcmData) {
const sampleRate = this.sampleRate;
const length = pcmData.byteLength / 2; // 计算采样点数(16位=2字节)
const audioBuffer = this.audioContext.createBuffer(1, length, sampleRate); // 创建单声道缓冲区
const channelData = audioBuffer.getChannelData(0);
const int16Array = new Int16Array(pcmData);
// 将16位有符号整数(-32768~32767)归一化为浮点数(-1.0~1.0)
for (let i = 0; i < length; i++) {
channelData[i] = int16Array[i] / 32768; // 32768=2^15(16位有符号最大值)
}
console.log(`准备音频:${length}个采样点,时长${length/sampleRate*1000}ms`);
return audioBuffer;
}
// 播放单个音频缓冲区
async _playAudio(arrayBuffer) {
if (this.audioContext.state === 'suspended') {
await this.audioContext.resume(); // 恢复挂起的音频上下文
}
const audioBuffer = this._bufferPCMData(arrayBuffer);
this.currentSource = this.audioContext.createBufferSource();
this.currentSource.buffer = audioBuffer;
this.currentSource.connect(this.audioContext.destination); // 连接到输出设备
// 播放结束事件处理
this.currentSource.onended = () => {
console.log('音频播放结束');
this.isPlaying = false;
this.currentSource = null;
this._playNextAudio(); // 播放下一个缓冲
};
this.currentSource.start(); // 启动播放
this.isPlaying = true;
}
// 处理音频队列播放逻辑
_playNextAudio() {
if (this.audioQueue.length > 0 && !this.isPlaying) {
// 合并队列中所有缓冲区(可能影响实时性,适用于非流式场景)
const totalLength = this.audioQueue.reduce((acc, buf) => acc + buf.byteLength, 0);
const combinedBuffer = new Uint8Array(totalLength);
let offset = 0;
this.audioQueue.forEach(buffer => {
combinedBuffer.set(new Uint8Array(buffer), offset);
offset += buffer.byteLength;
});
this.audioQueue = []; // 清空队列
this._playAudio(combinedBuffer.buffer); // 播放合并后的数据
}
}
// 立即停止播放并清空队列
stop() {
if (this.currentSource) {
this.currentSource.stop(); // 中止当前音频源
this.currentSource = null;
this.isPlaying = false;
}
this.audioQueue = [];
console.log('播放已停止,队列已清空');
}
}
PCM技术详解
参考音频基础知识及PCM技术详解