I'm trying to do a continuous speech to text transcription but can't seem to get webkitSpeechRecognition working as advertised. It seems to stop recording randomly (typically after a long duration of silence) even though I set continuous = true and interimResults = true. I can't even figure out what is causing this random stoppage as I put a simple log statement in EVERY.SINGLE.EVENT. that webkitSpeechRecognition has based on the docs I could find. Here is the script of what I have (essentially a slight mod on the demo https://www.google.com/intl/en/chrome/demos/speech.html)
What I know:
No errors are logged prior to the recording.
I'm on a gigabit connection so not lag related.
It stops normally after a long pause but sometimes stops without cause.
It can happen after 2 minutes or only 30 seconds.
I commented out some of the below but I've tried all of them without any success as to tracing the issue.
var final_transcript = '';
var recognizing = false;
var ignore_onend;
var start_timestamp;
if (!('webkitSpeechRecognition' in window)) {
upgrade();
} else {
var recognition = new webkitSpeechRecognition();
recognition.continuous = true;
recognition.interimResults = true;
recognition.onstart = function () {
recognizing = true;
};
recognition.onerror = function (event) {
console.log("ERROR")
recognizing = false
recognition.stop()
recognizing = true
recognition.start()
if (event.error == 'no-speech') {
console.log("NO SPEECH")
}
if (event.error == 'audio-capture') {
console.log("Capture Problem")
}
if (event.error == 'not-allowed') {
if (event.timeStamp - start_timestamp < 100) {
console.log("Block")
} else {
console.log("Deny")
}
}
};
recognition.onend = function () {
console.log("ONEND")
recognition.stop()
recognizing = false
recognition.start()
};
recognition.onresult = function (event) {
var interim_transcript = '';
if (typeof(event.results) == 'undefined') {
console.log("undefined start")
recognition.stop()
recognizing = false
recognition.start()
console.log("undefined end")
return;
}
for (var i = event.resultIndex; i < event.results.length; ++i) {
if (event.results[i].isFinal) {
final_transcript += event.results[i][0].transcript;
} else {
interim_transcript += event.results[i][0].transcript;
}
}
final_transcript = capitalize(final_transcript);
final_span.innerHTML = linebreak(final_transcript);
interim_span.innerHTML = linebreak(interim_transcript);
};
{# recognition.onspeechend = function () {#}
{# console.log("SpeechEND")#}
{# };#}
{##}
{# recognition.onpause = function() {#}
{# console.log("PAUSE")#}
{# }#}
{##}
{# recognition.onsoundend = function() {#}
{# console.log("Sound")#}
{# }#}
{##}
{##}
{# recognition.onaudioend = function() {#}
{# console.log("AUDIO")#}
{# }#}
{##}
{##}
{# recognition.onnomatch = function() {#}
{# console.log("NOMATCH")#}
{# }#}
{##}
{# recognition.onmark = function() {#}
{# console.log("MARK")#}
{# }#}
{##}
{# recognition.onboundary = function(){#}
{# console.log("BOUNDARY")#}
{# }#}
}
var two_line = /\n\n/g;
var one_line = /\n/g;
function linebreak(s) {
return s.replace(two_line, '<p></p>').replace(one_line, '<br>');
}
var first_char = /\S/;
function capitalize(s) {
return s.replace(first_char, function (m) {
return m.toUpperCase();
});
}
$("#start_call").click(function () {
$("#start_call").addClass('hidden');
$("#end_call").removeClass('hidden');
final_transcript = '';
recognition.lang = 'en-US';
recognition.start();
ignore_onend = false;
});
$("#end_call").click(function () {
$("#end_call").addClass('hidden');
$("#start_call").removeClass('hidden');
stopwatchClock.addClass('hidden');
recognition.stop();
recognizing = false
});
You can check sample implementation in file player.html in my github page https://github.com/pantprateek/genieYT. The logic is as follows :
Create a timer which stops recognition after every 10secs .
setInterval(resetVoiceRecog, 10000);
function resetVoiceRecog() {
recognition.stop();
}
when recognition.stop is called it surely invokes onend and then start recognition again.
recognition.onend = function(event) {
recognition.start();
}
This method works for me for hours even if I don't speak a word. Might help .
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With