I'm using the Google Cloud API for Speech-to-text, with a NodeJS back-end. The app needs to be able to listen for voice commands, and transmit them to the back-end as a buffer. For this, I need to send the buffer of the preceding audio when silence is detected.
Any help would be appreciated. Including the js code below
if (!navigator.getUserMedia)
navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia ||
navigator.mozGetUserMedia || navigator.msGetUserMedia;
if (navigator.getUserMedia) {
navigator.getUserMedia({audio: true}, success, function (e) {
alert('Error capturing audio.');
});
} else alert('getUserMedia not supported in this browser.');
var recording = false;
window.startRecording = function () {
recording = true;
};
window.stopRecording = function () {
recording = false;
// window.Stream.end();
};
function success(e) {
audioContext = window.AudioContext || window.webkitAudioContext;
context = new audioContext();
// the sample rate is in context.sampleRate
audioInput = context.createMediaStreamSource(e);
var bufferSize = 4096;
recorder = context.createScriptProcessor(bufferSize, 1, 1);
recorder.onaudioprocess = function (e) {
if (!recording) return;
console.log('recording');
var left = e.inputBuffer.getChannelData(0);
console.log(convertoFloat32ToInt16(left));
};
audioInput.connect(recorder);
recorder.connect(context.destination);
}
I'm not too sure as to what exactly is being asked in the question, so this answer is only intended to give a way to detect silences in an AudioStream.
To detect silence in an AudioStream, you can use an AudioAnalyser node, on which you will call the getByteFrequencyData
method at regular intervals, and check whether there were sounds higher than than your expected level for a given time.
You can set the threshold level directly with the minDecibels
property of the AnalyserNode.
function detectSilence(
stream,
onSoundEnd = _=>{},
onSoundStart = _=>{},
silence_delay = 500,
min_decibels = -80
) {
const ctx = new AudioContext();
const analyser = ctx.createAnalyser();
const streamNode = ctx.createMediaStreamSource(stream);
streamNode.connect(analyser);
analyser.minDecibels = min_decibels;
const data = new Uint8Array(analyser.frequencyBinCount); // will hold our data
let silence_start = performance.now();
let triggered = false; // trigger only once per silence event
function loop(time) {
requestAnimationFrame(loop); // we'll loop every 60th of a second to check
analyser.getByteFrequencyData(data); // get current data
if (data.some(v => v)) { // if there is data above the given db limit
if(triggered){
triggered = false;
onSoundStart();
}
silence_start = time; // set it to now
}
if (!triggered && time - silence_start > silence_delay) {
onSoundEnd();
triggered = true;
}
}
loop();
}
function onSilence() {
console.log('silence');
}
function onSpeak() {
console.log('speaking');
}
navigator.mediaDevices.getUserMedia({
audio: true
})
.then(stream => {
detectSilence(stream, onSilence, onSpeak);
// do something else with the stream
})
.catch(console.error);
And as a fiddle since stackSnippets may block gUM.
The simplest approach would be to use .pause()
and .resume()
, .stop()
methods of MediaRecorder()
to allow user to start, pause, and stop recording audio captured utilizing navigator.mediaDevices.getUserMedia()
and convert the resulting Blob
to an ArrayBuffer
, if that is what the api is expecting to be POST
ed to server
<!DOCTYPE html>
<html>
<head>
<title>User Media Recording</title>
</head>
<body>
<input type="button" value="Start/resume recording audio" id="start">
<input type="button" value="Pause recording audio" id="pause">
<input type="button" value="Stop recording audio" id="stop">
<script>
navigator.mediaDevices.getUserMedia({
audio: true
})
.then(stream => {
const recorder = new MediaRecorder(stream);
recorder.ondataavailable = async(e) => {
if (stream.active) {
try {
const blobURL = URL.createObjectURL(e.data);
const request = await fetch(blobURL);
const ab = await request.arrayBuffer();
// do stuff with `ArrayBuffer` of recorded audio
console.log(blobURL, ab);
// we do not need the `Blob URL`, we can revoke the object
// URL.revokeObjectURL(blobURL);
} catch (err) {
throw err
}
}
}
recorder.onpause = e => {
console.log("recorder " + recorder.state);
recorder.requestData();
}
stream.oninactive = () => {
console.log("stream ended");
}
document.getElementById("start")
.onclick = () => {
if (recorder.state === "inactive") {
recorder.start();
} else {
recorder.resume();
}
console.log("recorder.state:", recorder.state);
}
document.getElementById("pause")
.onclick = () => {
if (recorder.state === "recording") {
recorder.pause();
}
console.log("recorder.state:", recorder.state);
}
document.getElementById("stop")
.onclick = () => {
if (recorder.state === "recording" || recorder.state === "paused") {
recorder.stop();
}
for (let track of stream.getTracks()) {
track.stop();
}
document.getElementById("start").onclick = null;
document.getElementById("pause").onclick = null;
console.log("recorder.state:", recorder.state
, "stream.active", stream.active);
}
})
.catch(err => {
console.error(err)
});
</script>
</body>
</html>
plnkr https://plnkr.co/edit/7caWYMsvub90G6pwDdQp?p=preview
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With