对接扣子双向流式 TTS Demo

Web端对接Demo

<!DOCTYPE html>
<html lang="zh-CN">

<head>
    <meta charset="UTF-8">
    <title>TTS 测试</title>
</head>

<body>
    <h1>TTS 测试页面</h1>
    <textarea id="textInput" rows="4" cols="50">真正的成长，是学会接受自己的不完美。</textarea><br>
    <button onclick="sendText()">发送文本</button>

    <script>

        class PCMAudioPlayer {
            constructor(sampleRate) {
                this.sampleRate = sampleRate;
                this.audioContext = null;
                this.audioQueue = [];
                this.isPlaying = false;
                this.currentSource = null;
                const bufferThreshold = 2;
            }

            connect() {
                if (!this.audioContext) {
                    this.audioContext = new (window.AudioContext || window.webkitAudioContext)();
                }
            }

            pushPCM(arrayBuffer) {
                this.audioQueue.push(arrayBuffer);
                this._playNextAudio();
            }

            /**
             * 将arrayBuffer转为audioBuffer
             */
            _bufferPCMData(pcmData) {
                const sampleRate = this.sampleRate; // 设置为 PCM 数据的采样率
                const length = pcmData.byteLength / 2; // 假设 PCM 数据为 16 位，需除以 2
                const audioBuffer = this.audioContext.createBuffer(1, length, sampleRate);
                const channelData = audioBuffer.getChannelData(0);
                const int16Array = new Int16Array(pcmData); // 将 PCM 数据转换为 Int16Array

                for (let i = 0; i < length; i++) {
                    // 将 16 位 PCM 转换为浮点数 (-1.0 到 1.0)
                    channelData[i] = int16Array[i] / 32768; // 16 位数据转换范围
                }
                let audioLength = length / sampleRate * 1000;
                console.log(`prepare audio: ${length} samples, ${audioLength} ms`)

                return audioBuffer;
            }

            async _playAudio(arrayBuffer) {
                if (this.audioContext.state === 'suspended') {
                    await this.audioContext.resume();
                }

                const audioBuffer = this._bufferPCMData(arrayBuffer);

                this.currentSource = this.audioContext.createBufferSource();
                this.currentSource.buffer = audioBuffer;
                this.currentSource.connect(this.audioContext.destination);

                this.currentSource.onended = () => {
                    console.log('Audio playback ended.');
                    this.isPlaying = false;
                    this.currentSource = null;
                    this._playNextAudio(); // Play the next audio in the queue
                };
                this.currentSource.start();
                this.isPlaying = true;
            }

            _playNextAudio() {
                if (this.audioQueue.length > 0 && !this.isPlaying) {
                    // 计算总的字节长度
                    const totalLength = this.audioQueue.reduce((acc, buffer) => acc + buffer.byteLength, 0);
                    const combinedBuffer = new Uint8Array(totalLength);
                    let offset = 0;

                    // 将所有 audioQueue 中的 buffer 拼接到一个新的 Uint8Array 中
                    for (const buffer of this.audioQueue) {
                        combinedBuffer.set(new Uint8Array(buffer), offset);
                        offset += buffer.byteLength;
                    }

                    // 清空 audioQueue，因为我们已经拼接完所有数据
                    this.audioQueue = [];
                    // 发送拼接的 audio 数据给 playAudio
                    this._playAudio(combinedBuffer.buffer);
                }
            }
            stop() {
                if (this.currentSource) {
                    this.currentSource.stop(); // 停止当前音频播放
                    this.currentSource = null; // 清除音频源引用
                    this.isPlaying = false; // 更新播放状态
                }
                this.audioQueue = []; // 清空音频队列
                console.log('Playback stopped and queue cleared.');
            }

        }

        let player = new PCMAudioPlayer(24000);
        player.connect()
        player.stop()

        // WebSocket URL 根据实际API文档填写
        const socket = new WebSocket('wss://ws.coze.cn/v1/audio/speech?authorization=Bearer czs_l8r6XWz7Ogvh8diyHEyls4fnnsV4zPALaZQ019nI8yD8hB4wyDfmNeufVf3kckb6H');

        socket.onmessage = function (event) {
            try {
                const message = JSON.parse(event.data);
                if (message.event_type === 'speech.audio.update') {
                    const audioData = atob(message.data.delta);
                    console.log('audioData type ', typeof audioData);
                    const arrayBuffer = Uint8Array.from(audioData, c => c.charCodeAt(0)).buffer;
                    player.pushPCM(arrayBuffer)
                }
            } catch (error) {
                console.error('解析消息失败:', error);
            }
        };

        function sendText() {
            const textInput = document.getElementById('textInput').value;
            if (textInput) {
                // 发送文本到WebSocket服务器
                let append = {
                    "id": "event_id",
                    "event_type": "input_text_buffer.append",
                    "data": {
                        "delta": textInput
                    }
                }

                socket.send(JSON.stringify(append));

                let submitData = {
                    "id": "event_id",
                    "event_type": "input_text_buffer.complete"
                }
                socket.send(JSON.stringify(submitData));

            } else {
                alert('请输入要转换为语音的文本');
            }
        }
    </script>
</body>

</html>

PCMAudioPlayer

上面 demo 中的 PCMAudioPlayer 源码来自于阿里云TTS文档，在coze上没有找到怎么播放音频的demo, 想到了阿里云在文档方面做得比较好，结果真有。

下面是我用 AI 模型增加了一些代码注释，方便理解：

class PCMAudioPlayer {
    constructor(sampleRate) {
        this.sampleRate = sampleRate;      // 音频采样率（单位：Hz），需与PCM数据实际采样率一致
        this.audioContext = null;          // Web Audio API上下文实例
        this.audioQueue = [];              // 存储待播放的PCM数据缓冲区队列
        this.isPlaying = false;            // 标识当前是否正在播放音频
        this.currentSource = null;         // 当前播放的音频源节点
        const bufferThreshold = 2;         // 未使用的缓冲区阈值（代码中未实现逻辑）
    }

    // 初始化或恢复Web Audio上下文
    connect() {
        if (!this.audioContext) {
            // 创建音频上下文，兼容旧版webkit前缀
            this.audioContext = new (window.AudioContext || window.webkitAudioContext)();
        }
    }

    // 将PCM数据推入队列并尝试播放
    pushPCM(arrayBuffer) {
        this.audioQueue.push(arrayBuffer);
        this._playNextAudio();  // 触发播放逻辑
    }

    /**
     * 将16位有符号PCM数据转换为Web Audio兼容的AudioBuffer
     * @param {ArrayBuffer} pcmData - 原始16位PCM数据
     * @returns {AudioBuffer} - 标准化音频缓冲区对象
     */
    _bufferPCMData(pcmData) {
        const sampleRate = this.sampleRate;
        const length = pcmData.byteLength / 2; // 计算采样点数（16位=2字节）
        const audioBuffer = this.audioContext.createBuffer(1, length, sampleRate); // 创建单声道缓冲区
        const channelData = audioBuffer.getChannelData(0);
        const int16Array = new Int16Array(pcmData);

        // 将16位有符号整数(-32768~32767)归一化为浮点数(-1.0~1.0)
        for (let i = 0; i < length; i++) {
            channelData[i] = int16Array[i] / 32768;  // 32768=2^15（16位有符号最大值）
        }
        console.log(`准备音频：${length}个采样点，时长${length/sampleRate*1000}ms`);
        return audioBuffer;
    }

    // 播放单个音频缓冲区
    async _playAudio(arrayBuffer) {
        if (this.audioContext.state === 'suspended') {
            await this.audioContext.resume();  // 恢复挂起的音频上下文
        }

        const audioBuffer = this._bufferPCMData(arrayBuffer);
        this.currentSource = this.audioContext.createBufferSource();
        this.currentSource.buffer = audioBuffer;
        this.currentSource.connect(this.audioContext.destination);  // 连接到输出设备

        // 播放结束事件处理
        this.currentSource.onended = () => {
            console.log('音频播放结束');
            this.isPlaying = false;
            this.currentSource = null;
            this._playNextAudio();  // 播放下一个缓冲
        };
        this.currentSource.start();  // 启动播放
        this.isPlaying = true;
    }

    // 处理音频队列播放逻辑
    _playNextAudio() {
        if (this.audioQueue.length > 0 && !this.isPlaying) {
            // 合并队列中所有缓冲区（可能影响实时性，适用于非流式场景）
            const totalLength = this.audioQueue.reduce((acc, buf) => acc + buf.byteLength, 0);
            const combinedBuffer = new Uint8Array(totalLength);
            let offset = 0;
            this.audioQueue.forEach(buffer => {
                combinedBuffer.set(new Uint8Array(buffer), offset);
                offset += buffer.byteLength;
            });
            this.audioQueue = [];  // 清空队列
            this._playAudio(combinedBuffer.buffer);  // 播放合并后的数据
        }
    }

    // 立即停止播放并清空队列
    stop() {
        if (this.currentSource) {
            this.currentSource.stop();  // 中止当前音频源
            this.currentSource = null;
            this.isPlaying = false;
        }
        this.audioQueue = [];
        console.log('播放已停止，队列已清空');
    }
}