Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Android audio capture silence detection

Tags:

java

android

I found this android code to record the user sound when he/she starts to speak and stop recording when he/she stops. But the problem is that the recording stops very quickly. If one wants to say two words, it just records the first.

How can the following code be changed to make the recording process less sensitive to momentary silence?

public void onCreate(Bundle savedInstanceState) {
    super.onCreate(savedInstanceState);
    setContentView(R.layout.main);

    final int RECORDER_BPP = 16;
    int RECORDER_SAMPLERATE = 8000;
    int RECORDER_CHANNELS = AudioFormat.CHANNEL_IN_MONO;
    int RECORDER_AUDIO_ENCODING = AudioFormat.ENCODING_PCM_16BIT;

    // Get the minimum buffer size required for the successful creation of
    // an AudioRecord object.
    int bufferSizeInBytes = AudioRecord
            .getMinBufferSize(RECORDER_SAMPLERATE, RECORDER_CHANNELS,
                    RECORDER_AUDIO_ENCODING);
    // Initialize Audio Recorder.
    AudioRecord audioRecorder = new AudioRecord(
            MediaRecorder.AudioSource.MIC, RECORDER_SAMPLERATE,
            RECORDER_CHANNELS, RECORDER_AUDIO_ENCODING, bufferSizeInBytes);
    // Start Recording.
    audioRecorder.startRecording();

    int numberOfReadBytes = 0;
    byte audioBuffer[] = new byte[bufferSizeInBytes];
    boolean recording = false;
    float tempFloatBuffer[] = new float[3];
    int tempIndex = 0;
    int totalReadBytes = 0;
    byte totalByteBuffer[] = new byte[60 * 44100 * 2];

    // While data come from microphone.
    while (true) {
        float totalAbsValue = 0.0f;
        short sample = 0;

        numberOfReadBytes = audioRecorder.read(audioBuffer, 0,
                bufferSizeInBytes);

        // Analyze Sound.
        for (int i = 0; i < bufferSizeInBytes; i += 2) {
            sample = (short) ((audioBuffer[i]) | audioBuffer[i + 1] << 8);
            totalAbsValue += Math.abs(sample) / (numberOfReadBytes / 2);
        }

        // Analyze temp buffer.
        tempFloatBuffer[tempIndex % 3] = totalAbsValue;
        float temp = 0.0f;
        for (int i = 0; i < 3; ++i)
            temp += tempFloatBuffer[i];

        if ((temp >= 0 && temp <= 350) && recording == false) {
            Log.i("TAG", "1");
            tempIndex++;
            continue;
        }

        if (temp > 350 && recording == false) {
            Log.i("TAG", "2");
            recording = true;
        }

        if ((temp >= 0 && temp <= 350) && recording == true) {
            Log.i("TAG", "Save audio to file.");

            // Save audio to file.
            String filepath = Environment.getExternalStorageDirectory()
                    .getPath();
            File file = new File(filepath, "AudioRecorder");
            if (!file.exists())
                file.mkdirs();

            String fn = file.getAbsolutePath() + "/"
                    + System.currentTimeMillis() + ".wav";

            long totalAudioLen = 0;
            long totalDataLen = totalAudioLen + 36;
            long longSampleRate = RECORDER_SAMPLERATE;
            int channels = 1;
            long byteRate = RECORDER_BPP * RECORDER_SAMPLERATE * channels
                    / 8;
            totalAudioLen = totalReadBytes;
            totalDataLen = totalAudioLen + 36;
            byte finalBuffer[] = new byte[totalReadBytes + 44];

            finalBuffer[0] = 'R'; // RIFF/WAVE header
            finalBuffer[1] = 'I';
            finalBuffer[2] = 'F';
            finalBuffer[3] = 'F';
            finalBuffer[4] = (byte) (totalDataLen & 0xff);
            finalBuffer[5] = (byte) ((totalDataLen >> 8) & 0xff);
            finalBuffer[6] = (byte) ((totalDataLen >> 16) & 0xff);
            finalBuffer[7] = (byte) ((totalDataLen >> 24) & 0xff);
            finalBuffer[8] = 'W';
            finalBuffer[9] = 'A';
            finalBuffer[10] = 'V';
            finalBuffer[11] = 'E';
            finalBuffer[12] = 'f'; // 'fmt ' chunk
            finalBuffer[13] = 'm';
            finalBuffer[14] = 't';
            finalBuffer[15] = ' ';
            finalBuffer[16] = 16; // 4 bytes: size of 'fmt ' chunk
            finalBuffer[17] = 0;
            finalBuffer[18] = 0;
            finalBuffer[19] = 0;
            finalBuffer[20] = 1; // format = 1
            finalBuffer[21] = 0;
            finalBuffer[22] = (byte) channels;
            finalBuffer[23] = 0;
            finalBuffer[24] = (byte) (longSampleRate & 0xff);
            finalBuffer[25] = (byte) ((longSampleRate >> 8) & 0xff);
            finalBuffer[26] = (byte) ((longSampleRate >> 16) & 0xff);
            finalBuffer[27] = (byte) ((longSampleRate >> 24) & 0xff);
            finalBuffer[28] = (byte) (byteRate & 0xff);
            finalBuffer[29] = (byte) ((byteRate >> 8) & 0xff);
            finalBuffer[30] = (byte) ((byteRate >> 16) & 0xff);
            finalBuffer[31] = (byte) ((byteRate >> 24) & 0xff);
            finalBuffer[32] = (byte) (2 * 16 / 8); // block align
            finalBuffer[33] = 0;
            finalBuffer[34] = RECORDER_BPP; // bits per sample
            finalBuffer[35] = 0;
            finalBuffer[36] = 'd';
            finalBuffer[37] = 'a';
            finalBuffer[38] = 't';
            finalBuffer[39] = 'a';
            finalBuffer[40] = (byte) (totalAudioLen & 0xff);
            finalBuffer[41] = (byte) ((totalAudioLen >> 8) & 0xff);
            finalBuffer[42] = (byte) ((totalAudioLen >> 16) & 0xff);
            finalBuffer[43] = (byte) ((totalAudioLen >> 24) & 0xff);

            for (int i = 0; i < totalReadBytes; ++i)
                finalBuffer[44 + i] = totalByteBuffer[i];

            FileOutputStream out;
            try {
                out = new FileOutputStream(fn);
                try {
                    out.write(finalBuffer);
                    out.close();
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }

            } catch (FileNotFoundException e1) {
                // TODO Auto-generated catch block
                e1.printStackTrace();
            }

            // */
            tempIndex++;
            break;
        }

        // -> Recording sound here.
        Log.i("TAG", "Recording Sound.");
        for (int i = 0; i < numberOfReadBytes; i++)
            totalByteBuffer[totalReadBytes + i] = audioBuffer[i];
        totalReadBytes += numberOfReadBytes;
        // */

        tempIndex++;

    }
}
like image 855
amir secretery Avatar asked Oct 02 '13 19:10

amir secretery


2 Answers

I don't like the way you wrote your code. Use mine. You can change the threshold variable according to your voice volume. In this sample app there is no button. When you launch the app, it starts to listen, when you press back button, the aquisition is stopped and the file is stored on phone memory, folder AudioRecorder, with a new file each time you run it. read even comments and commented code. Note: the voice is appended to a temporary file each time the voice exceeds the threshold. The wav header is added when you manually stop the recording by pressing back (the wav file is created from the temporary file, with a new unique name) . If you need to create a new file each time the voice is revealed, you can easily modify it according to your needs, but you should always pass through temporary file saving. If you need a sort of delay, in order to record more, even when there is no voice (after the voice), just continue to save the data while your delay is elapsed. You can create the delay, counting the elapsed time (System.nanotime) since last found Peak (indicating voice presence).

Don't forget to mark the accepted solution (i tested it).

 package com.example.testaudiocapturewiththreshold;

 import android.os.Bundle;
 import android.app.Activity;
 import android.view.Menu;


 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.IOException;

 import android.media.AudioFormat;
 import android.media.AudioRecord;
 import android.media.MediaRecorder;
 import android.os.AsyncTask;
 import android.os.Environment;
 import android.os.Handler;
 import android.util.Log;

 public class TestAudioCaptureWithThreshold extends Activity {

private static final String TAG = TestAudioCaptureWithThreshold.class.getSimpleName();
private static final int RECORDER_BPP = 16;
private static final String AUDIO_RECORDER_FILE_EXT_WAV = ".wav";
private static final String AUDIO_RECORDER_FOLDER = "AudioRecorder";
private static final String AUDIO_RECORDER_TEMP_FILE = "record_temp.raw";

FileOutputStream os = null;

int bufferSize ;
int frequency = 44100; //8000;
int channelConfiguration = AudioFormat.CHANNEL_IN_MONO;
int audioEncoding = AudioFormat.ENCODING_PCM_16BIT;
boolean started = false;
  RecordAudio recordTask;

short threshold=15000; 

boolean debug=false;

@Override
protected void onCreate(Bundle savedInstanceState) {
    Log.w(TAG, "onCreate");
    super.onCreate(savedInstanceState);
    setContentView(R.layout.activity_test_audio_capture_with_threshold);

    startAquisition();
}


@Override
 protected void onResume() {
    Log.w(TAG, "onResume");
    super.onResume();


}

@Override
protected void onDestroy() {
    Log.w(TAG, "onDestroy");
    stopAquisition();
    super.onDestroy();

}

public class RecordAudio extends AsyncTask<Void, Double, Void> {

    @Override
    protected Void doInBackground(Void... arg0) {
        Log.w(TAG, "doInBackground");
        try {

                String filename = getTempFilename();

            try {
                        os = new FileOutputStream(filename);
            } catch (FileNotFoundException e) {
                        e.printStackTrace();
            }   


            bufferSize = AudioRecord.getMinBufferSize(frequency, 
            channelConfiguration, audioEncoding); 

            AudioRecord audioRecord = new AudioRecord( MediaRecorder.AudioSource.MIC, frequency, 
                    channelConfiguration, audioEncoding, bufferSize); 

            short[] buffer = new short[bufferSize];

            audioRecord.startRecording();

            while (started) {
                int bufferReadResult = audioRecord.read(buffer, 0,bufferSize);
                if(AudioRecord.ERROR_INVALID_OPERATION != bufferReadResult){
                      //check signal
                    //put a threshold
                      int foundPeak=searchThreshold(buffer,threshold);
                        if (foundPeak>-1){ //found signal
                                                //record signal
                            byte[] byteBuffer =ShortToByte(buffer,bufferReadResult);
                        try {
                                os.write(byteBuffer);
                        } catch (IOException e) {
                                e.printStackTrace();
                        }
                        }else{//count the time
                            //don't save signal
                        }


                                //show results
                        //here, with publichProgress function, if you calculate the total saved samples, 
                        //you can optionally show the recorded file length in seconds:      publishProgress(elsapsedTime,0);


                }
            }

            audioRecord.stop();


            //close file
              try {
                    os.close();
              } catch (IOException e) {
                    e.printStackTrace();
              }

              copyWaveFile(getTempFilename(),getFilename());
              deleteTempFile();


        } catch (Throwable t) {
            t.printStackTrace();
            Log.e("AudioRecord", "Recording Failed");
        }
        return null;

    } //fine di doInBackground

      byte [] ShortToByte(short [] input, int elements) {
      int short_index, byte_index;
      int iterations = elements; //input.length;
      byte [] buffer = new byte[iterations * 2];

      short_index = byte_index = 0;

      for(/*NOP*/; short_index != iterations; /*NOP*/)
      {
        buffer[byte_index]     = (byte) (input[short_index] & 0x00FF); 
        buffer[byte_index + 1] = (byte) ((input[short_index] & 0xFF00) >> 8);

        ++short_index; byte_index += 2;
      }

      return buffer;
    }


    int searchThreshold(short[]arr,short thr){
        int peakIndex;
        int arrLen=arr.length;
        for (peakIndex=0;peakIndex<arrLen;peakIndex++){
            if ((arr[peakIndex]>=thr) || (arr[peakIndex]<=-thr)){
                //se supera la soglia, esci e ritorna peakindex-mezzo kernel.

                return peakIndex;
            }
        }
        return -1; //not found
    }

    /*
    @Override
    protected void onProgressUpdate(Double... values) {
        DecimalFormat sf = new DecimalFormat("000.0000");           
        elapsedTimeTxt.setText(sf.format(values[0]));

    }
    */

    private String getFilename(){
        String filepath = Environment.getExternalStorageDirectory().getPath();
        File file = new File(filepath,AUDIO_RECORDER_FOLDER);

        if(!file.exists()){
                file.mkdirs();
        }

        return (file.getAbsolutePath() + "/" + System.currentTimeMillis() + AUDIO_RECORDER_FILE_EXT_WAV);
    }


    private String getTempFilename(){
        String filepath = Environment.getExternalStorageDirectory().getPath();
        File file = new File(filepath,AUDIO_RECORDER_FOLDER);

        if(!file.exists()){
                file.mkdirs();
        }

        File tempFile = new File(filepath,AUDIO_RECORDER_TEMP_FILE);

        if(tempFile.exists())
                tempFile.delete();

        return (file.getAbsolutePath() + "/" + AUDIO_RECORDER_TEMP_FILE);
    }





    private void deleteTempFile() {
            File file = new File(getTempFilename());

            file.delete();
    }

    private void copyWaveFile(String inFilename,String outFilename){
        FileInputStream in = null;
        FileOutputStream out = null;
        long totalAudioLen = 0;
        long totalDataLen = totalAudioLen + 36;
        long longSampleRate = frequency;
        int channels = 1;
        long byteRate = RECORDER_BPP * frequency * channels/8;

        byte[] data = new byte[bufferSize];

        try {
                in = new FileInputStream(inFilename);
                out = new FileOutputStream(outFilename);
                totalAudioLen = in.getChannel().size();
                totalDataLen = totalAudioLen + 36;


                WriteWaveFileHeader(out, totalAudioLen, totalDataLen,
                                longSampleRate, channels, byteRate);

                while(in.read(data) != -1){
                        out.write(data);
                }

                in.close();
                out.close();
        } catch (FileNotFoundException e) {
                e.printStackTrace();
        } catch (IOException e) {
                e.printStackTrace();
        }
    }

    private void WriteWaveFileHeader(
                    FileOutputStream out, long totalAudioLen,
                    long totalDataLen, long longSampleRate, int channels,
                    long byteRate) throws IOException {

            byte[] header = new byte[44];

            header[0] = 'R';  // RIFF/WAVE header
            header[1] = 'I';
            header[2] = 'F';
            header[3] = 'F';
            header[4] = (byte) (totalDataLen & 0xff);
            header[5] = (byte) ((totalDataLen >> 8) & 0xff);
            header[6] = (byte) ((totalDataLen >> 16) & 0xff);
            header[7] = (byte) ((totalDataLen >> 24) & 0xff);
            header[8] = 'W';
            header[9] = 'A';
            header[10] = 'V';
            header[11] = 'E';
            header[12] = 'f';  // 'fmt ' chunk
            header[13] = 'm';
            header[14] = 't';
            header[15] = ' ';
            header[16] = 16;  // 4 bytes: size of 'fmt ' chunk
            header[17] = 0;
            header[18] = 0;
            header[19] = 0;
            header[20] = 1;  // format = 1
            header[21] = 0;
            header[22] = (byte) channels;
            header[23] = 0;
            header[24] = (byte) (longSampleRate & 0xff);
            header[25] = (byte) ((longSampleRate >> 8) & 0xff);
            header[26] = (byte) ((longSampleRate >> 16) & 0xff);
            header[27] = (byte) ((longSampleRate >> 24) & 0xff);
            header[28] = (byte) (byteRate & 0xff);
            header[29] = (byte) ((byteRate >> 8) & 0xff);
            header[30] = (byte) ((byteRate >> 16) & 0xff);
            header[31] = (byte) ((byteRate >> 24) & 0xff);
            header[32] = (byte) (channels * 16 / 8);  // block align
            header[33] = 0;
            header[34] = RECORDER_BPP;  // bits per sample
            header[35] = 0;
            header[36] = 'd';
            header[37] = 'a';
            header[38] = 't';
            header[39] = 'a';
            header[40] = (byte) (totalAudioLen & 0xff);
            header[41] = (byte) ((totalAudioLen >> 8) & 0xff);
            header[42] = (byte) ((totalAudioLen >> 16) & 0xff);
            header[43] = (byte) ((totalAudioLen >> 24) & 0xff);

            out.write(header, 0, 44);
    }

} //Fine Classe RecordAudio (AsyncTask)

@Override
public boolean onCreateOptionsMenu(Menu menu) {
    getMenuInflater().inflate(R.menu.test_audio_capture_with_threshold,
            menu);
    return true;

}


public void resetAquisition() {
    Log.w(TAG, "resetAquisition");
    stopAquisition();
  //startButton.setText("WAIT");
  startAquisition();
}

public void stopAquisition() {
    Log.w(TAG, "stopAquisition");
    if (started) {
        started = false;
        recordTask.cancel(true);
    }
}

public void startAquisition(){
    Log.w(TAG, "startAquisition");
    Handler handler = new Handler(); 
    handler.postDelayed(new Runnable() { 
         public void run() { 

            //elapsedTime=0;
            started = true;
            recordTask = new RecordAudio();
            recordTask.execute();
            //startButton.setText("RESET");
         } 
    }, 500); 
}


 }

Don't forget to add permissions to manifest file:

 <?xml version="1.0" encoding="utf-8"?>
 <manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="com.example.testaudiocapturewiththreshold"
android:versionCode="1"
android:versionName="1.0" >
<uses-permission android:name="android.permission.MODIFY_AUDIO_SETTINGS"/>
<uses-permission android:name="android.permission.RECORD_AUDIO" />
<uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />

<uses-sdk
    android:minSdkVersion="8"
    android:targetSdkVersion="17" />

<application
    android:allowBackup="true"
    android:icon="@drawable/ic_launcher"
    android:label="@string/app_name"
    android:theme="@style/AppTheme" >
    <activity
        android:name="com.example.testaudiocapturewiththreshold.TestAudioCaptureWithThreshold"
        android:label="@string/app_name" >
        <intent-filter>
            <action android:name="android.intent.action.MAIN" />

            <category android:name="android.intent.category.LAUNCHER" />
        </intent-filter>
    </activity>
</application>

 </manifest>
like image 130
Gaucho Avatar answered Sep 29 '22 21:09

Gaucho


For those who didn't find an answer yet.It is possible to detect the silence and stop recording when the user hasn't spoken for some seconds.

In order to calculate if the user has stopped speaking we can take the data from the last second of the recording, map it to a number and compare this number to the numbers obtained previously. Were turn a confidence score (0-INF) of a longer pause having occurred in the speech input.

Follow this link everything is explained there. https://github.com/Kaljurand/speechutils/blob/master/app/src/main/java/ee/ioc/phon/android/speechutils/AbstractAudioRecorder.java thanks to Kaljurand and his wonderful code.

You can find the entire project in this link. https://github.com/Kaljurand/K6nele.

like image 23
sunil sunny Avatar answered Sep 29 '22 22:09

sunil sunny