admin管理员组

文章数量:1344241

I'm running into two bugs with Speech Synthesis on Google Chrome Version 135.0.7049.42 on MacOS 15.1.1 (24B91). The first is that when using the asynchronous/online voices provided by Google (e.g. Google US English), no events are fired, such as start, end, etc. However, the MacOS native voices (e.g. Samantha) do emit these events as expected.

My second issue is that the first time speak() is called after starting Chrome, it does not play any audio aloud. After refresh or trying again, speak() suddenly works. I've also noticed that if I wait for a while before trying to call speak() again, it will stop working again until another refresh or another attempt happens.

It also is interesting to note that even after the Google voice has stopped speaking, the utterance never really seems to finish, as if you try to start another, it will say it is already speaking.

I've created a small demo file to test this out. Simply create a new HTML file and paste the code, then open it with Chrome (make sure it's the first time Chrome is opening, i.e. quit and reopen it to reproduce).

<!DOCTYPE html>
<html lang="en">
   <head>
      <meta charset="UTF-8" />
      <meta name="viewport" content="width=device-width, initial-scale=1.0" />
      <title>SpeechSynthesis API Test</title>
      <style>
         body {
            font-family: Arial, sans-serif;
            max-width: 800px;
            margin: 0 auto;
            padding: 20px;
         }
         button {
            padding: 10px;
            margin: 10px 0;
         }
         select {
            padding: 5px;
            margin-bottom: 15px;
            width: 100%;
         }
         textarea {
            width: 100%;
            height: 100px;
            padding: 10px;
            margin-bottom: 15px;
         }
      </style>
   </head>
   <body>
      <h1>SpeechSynthesis API Test</h1>

      <div>
         <label for="text">Text to speak:</label>
         <textarea id="text">Hello! This is a test of the SpeechSynthesis API.</textarea>
      </div>

      <div>
         <label for="voice-select">Select Voice:</label>
         <select id="voice-select">
            <option value="">Loading voices...</option>
         </select>
      </div>

      <div>
         <button id="speak-btn">Speak</button>
         <button id="pause-btn">Pause</button>
         <button id="resume-btn">Resume</button>
         <button id="cancel-btn">Cancel</button>
      </div>

      <div id="status"></div>

      <script>
         // Check if browser supports speech synthesis
         if ("speechSynthesis" in window) {
            const synth = window.speechSynthesis;
            const textInput = document.getElementById("text");
            const voiceSelect = document.getElementById("voice-select");
            const speakBtn = document.getElementById("speak-btn");
            const pauseBtn = document.getElementById("pause-btn");
            const resumeBtn = document.getElementById("resume-btn");
            const cancelBtn = document.getElementById("cancel-btn");
            const statusEl = document.getElementById("status");

            let voices = [];

            // Function to populate voice dropdown
            function populateVoiceList() {
               voices = synth.getVoices();

               if (voices.length === 0) {
                  voiceSelect.innerHTML = '<option value="">No voices available</option>';
                  return;
               }

               voiceSelect.innerHTML = "";

               voices.forEach((voice, index) => {
                  const option = document.createElement("option");
                  option.value = index;
                  option.textContent = `${voice.name} (${voice.lang})`;
                  if (voice.default) {
                     option.selected = true;
                  }
                  voiceSelect.appendChild(option);
               });

               statusEl.textContent = `Loaded ${voices.length} voices.`;
            }

            // Initial population of voices
            populateVoiceList();

            // Chrome loads voices asynchronously
            if (synth.onvoiceschanged !== undefined) {
               synth.onvoiceschanged = populateVoiceList;
            }

            // Speak function
            function speak() {
               if (synth.speaking) {
                  statusEl.textContent = "Speech synthesis already in progress";
                  return;
               }

               const text = textInput.value;
               if (!text) {
                  statusEl.textContent = "Please enter text to speak";
                  return;
               }

               const utterance = new SpeechSynthesisUtterance(text);

               // Set selected voice if available
               if (voices.length > 0 && voiceSelect.value !== "") {
                  utterance.voice = voices[voiceSelect.value];
               }

               // Event handlers
               utterance.onstart = () => {
                  console.log("Speech started at:", new Date().toLocaleTimeString());
                  statusEl.textContent = "Speaking...";
               };

               utterance.onend = () => {
                  console.log("Speech ended at:", new Date().toLocaleTimeString());
                  statusEl.textContent = "Speech synthesis finished";
               };

               utterance.onerror = (event) => {
                  console.log(
                     "Speech error at:",
                     new Date().toLocaleTimeString(),
                     "Error:",
                     event.error
                  );
                  statusEl.textContent = "Error occurred: " + event.error;
               };

               synth.speak(utterance);
            }

            // Button event listeners
            speakBtn.addEventListener("click", speak);

            pauseBtn.addEventListener("click", () => {
               if (synth.speaking) {
                  synth.pause();
                  statusEl.textContent = "Speech synthesis paused";
               }
            });

            resumeBtn.addEventListener("click", () => {
               if (synth.paused) {
                  synth.resume();
                  statusEl.textContent = "Speech synthesis resumed";
               }
            });

            cancelBtn.addEventListener("click", () => {
               synth.cancel();
               statusEl.textContent = "Speech synthesis canceled";
            });
         } else {
            document.body.innerHTML =
               "<h1>Sorry, your browser does not support Speech Synthesis</h1>";
         }
      </script>
   </body>
</html>

本文标签: