My program get a correct respon from google when the flac file recorded manual by using windows's sound recorder and convert it using a software converter.
But when I use the file that recorded by my program, I got "{"result":[]}
" from google. What should I do?
here is my code :
the sender :
private static void CopyStream(FileStream fileStream, Stream requestStream)
{
var buffer = new byte[32768];
int read;
while ((read = fileStream.Read(buffer, 0, buffer.Length)) > 0)
{
requestStream.Write(buffer, 0, read);
}
}
private static void ConfigureRequest(HttpWebRequest request)
{
request.KeepAlive = true;
request.SendChunked = true;
request.ContentType = "audio/x-flac; rate=44100";
request.UserAgent =
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.121 Safari/535.2";
request.Headers.Set(HttpRequestHeader.AcceptEncoding, "gzip,deflate,sdch");
request.Headers.Set(HttpRequestHeader.AcceptLanguage, "en-GB,en-US;q=0.8,en;q=0.6");
request.Headers.Set(HttpRequestHeader.AcceptCharset, "ISO-8859-1,utf-8;q=0.7,*;q=0.3");
request.Method = "POST";
}
using (var fileStream = new FileStream(@"C:\Users\Ahmad Mustofa\Documents\Visual Studio 2010\Projects\FP\FP\bin\Debug\voice.flac", FileMode.Open))
{
const string requestUrl = "https://www.google.com/speech-api/v2/recognize?output=json&lang=ar-sa&key=AIzaSyBJ6VJ326Rpb23msih2wGhXENEwU1TF1PA&client=chromium&maxresults=1&pfilter=2";
var request = (HttpWebRequest)WebRequest.Create(requestUrl);
ConfigureRequest(request);
var requestStream = request.GetRequestStream();
CopyStream(fileStream, requestStream);
using (var response = request.GetResponse())
{
using (var responseStream = response.GetResponseStream())
{
using (var zippedStream = new GZipStream(responseStream, CompressionMode.Decompress))
{
using (var sr = new StreamReader(zippedStream))
{
var res = sr.ReadToEnd();
state.Text = res;
}
}
}
}
}
the wav recorder:
private void sourceStream_DataAvailable(object sender, NAudio.Wave.WaveInEventArgs e)
{
if (waveWriter == null) return;
waveWriter.WriteData(e.Buffer, 0, e.BytesRecorded);
waveWriter.Flush();
}
fileName = "C:\\Users\\Ahmad Mustofa\\Documents\\Visual Studio 2010\\Projects\\FP\\FP\\bin\\debug\\voice.wav";
int deviceNumber = hardware.SelectedItems[0].Index;
try
{
sourceStream = new NAudio.Wave.WaveIn();
sourceStream.DeviceNumber = deviceNumber;
sourceStream.WaveFormat = new NAudio.Wave.WaveFormat(44100, NAudio.Wave.WaveIn.GetCapabilities(deviceNumber).Channels);
sourceStream.DataAvailable += new EventHandler<NAudio.Wave.WaveInEventArgs>(sourceStream_DataAvailable);
waveWriter = new NAudio.Wave.WaveFileWriter(fileName, sourceStream.WaveFormat);
sourceStream.StartRecording();
}
catch (Exception ex)
{
state.Text = "disini" + ex.Message;
}
flac converter:
string inputFile = Path.Combine("wav ", input);
string outputFile = Path.Combine("flac", Path.ChangeExtension(input, ".flac"));
if (!File.Exists(inputFile))
throw new ApplicationException("Input file " + inputFile + " cannot be found!");
WavReader wav = new WavReader(inputFile);
using (var flacStream = File.Create(outputFile))
{
FlacWriter flac = new FlacWriter(flacStream, wav.BitDepth, wav.Channels, wav.SampleRate);
// Buffer for 1 second's worth of audio data
byte[] buffer = new byte[wav.Bitrate / 8];
int bytesRead;
do
{
bytesRead = wav.InputStream.Read(buffer, 0, buffer.Length);
flac.Convert(buffer, 0, bytesRead);
} while (bytesRead > 0);
flac.Dispose();
flac = null;
}
Text-to-Speech is priced based on the number of characters sent to the service to be synthesized into audio each month. You must enable billing to use Text-to-Speech, and will be automatically charged if your usage exceeds the number of free characters allowed per month.
Google Cloud Text-to-Speech API (Beta) allows developers to include natural-sounding, synthetic human speech as playable audio in their applications.
Amberscript. Get the most accurate and one of the best speech-to-text APIs in the market – Amberscript. It provides custom ASR models according to your needs and lets you integrate them easily with your software for real-time audio and video files, texts perfected by humans, and phone calls.
I was also having the same issue but came up with a neat solution. I used Fiddler (http://www.telerik.com/fiddler/) to figure out how Chrome does the speech recognition and then created some code to emulate chrome sending the request. This approach uses a different URI and there is also a 16-character value called pair which is different for each request. I use a simple random value generator function to create one for the request and I also changed the output value to 'json'.
Note: The result can sometimes be empty as in your case above but there's also another json object in the response that contains the alternatives.
private void GoogleSpeechToText()
{
string uri = "https://www.google.com/speech-api/full-duplex/v1/up?output=json&key=AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw&pair=" + GenerateUnique(16) + "&lang=en-US&pFilter=2&maxAlternatives=10&client=chromium";
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(uri);
request.Timeout = 10000;
request.Method = "POST";
request.Host = "www.google.com";
request.KeepAlive = true;
request.SendChunked = true;
request.ContentType = "audio/x-flac; rate=16000";
request.Headers.Set(HttpRequestHeader.AcceptLanguage, "en-GB,en-US;q=0.8,en;q=0.6");
request.UserAgent = "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.143 Safari/537.36";
string path = @"C:\TestFolder\test_audio.flac";
FileInfo fInfo = new FileInfo(path);
var numBytes = fInfo.Length;
byte[] data;
using (FileStream fStream = new FileStream(path, FileMode.Open, FileAccess.Read))
{
data = new Byte[numBytes];
fStream.Read(data, 0, (int) numBytes);
fStream.Close();
}
using (Stream reqStream = request.GetRequestStream())
reqStream.Write(data, 0, data.Length);
try
{
WebResponse response = request.GetResponse();
Stream respStream = response.GetResponseStream();
if(response.ContentType == "application/json; charset=utf-8")
{
using (var sr = new StreamReader(respStream))
{
var res = sr.ReadToEnd();
textBox1.Text = res;
}
}
}
catch (Exception ex) { MessageBox.Show(ex.Message, "Error", MessageBoxButtons.OK); }
}
private string GenerateUnique(int length)
{
string[] LETTERS = new string[] { "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z" };
string[] DIGITS = new string[] { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9" };
string buffer = "";
Random random = new Random();
for(int i = 0; i < length; i++)
{
int rnd = random.Next(2);
if (rnd == 1)
buffer += LETTERS[random.Next(LETTERS.Length)];
else
buffer += DIGITS[random.Next(DIGITS.Length)];
}
return buffer;
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With