Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Superpowered: can't get TimeStretching to work correctly, the output sound is distorted

I'm trying to use the Superpowered SDK to apply a real-time Time Stretching and Pitch Shifting on an mp3 file which is being played and also recorded at the same time. The problem is that no matter what I do the output sound quality is terrible (to the point of it being distorted).
I suspect that it's due to the conflicting samples per frame number. Here is the complete source code of my cpp file:

static SuperpoweredAndroidAudioIO *audioIO;
static SuperpoweredTimeStretching *stretching;
static SuperpoweredAudiopointerList *outputBuffers;
static SuperpoweredDecoder *decoder;
static SuperpoweredRecorder *recorder;
const char *outFilePath;
const char *tempFilePath;

static short int *intBuffer;
static float *playerBuffer;

bool audioInitialized = false;
bool playing = false;

static bool audioProcessing(
        void *__unused clientData, // custom pointer
        short int *audio,           // buffer of interleaved samples
        int numberOfFrames,         // number of frames to process
        int __unused sampleRate     // sampling rate
) {

    if (playing) {
        unsigned int samplesDecoded = decoder->samplesPerFrame;
        if (decoder->decode(intBuffer, &samplesDecoded) == SUPERPOWEREDDECODER_ERROR) return false;
        if (samplesDecoded < 1) {
            playing = false;
            return false;
        }



        SuperpoweredAudiobufferlistElement inputBuffer;
        inputBuffer.samplePosition = decoder->samplePosition;
        inputBuffer.startSample = 0;
        inputBuffer.samplesUsed = 0;
        inputBuffer.endSample = samplesDecoded;
        inputBuffer.buffers[0] = SuperpoweredAudiobufferPool::getBuffer(samplesDecoded * 8 + 64);
        inputBuffer.buffers[1] = inputBuffer.buffers[2] = inputBuffer.buffers[3] = NULL;


        SuperpoweredShortIntToFloat(intBuffer, (float *) inputBuffer.buffers[0], samplesDecoded);

        stretching->process(&inputBuffer, outputBuffers);

        if (outputBuffers->makeSlice(0, outputBuffers->sampleLength)) {

            while (true) { 
                int numSamples = 0;
                float *timeStretchedAudio = (float *) outputBuffers->nextSliceItem(&numSamples);
                if (!timeStretchedAudio) break;

                SuperpoweredFloatToShortInt(timeStretchedAudio, intBuffer,
                                            (unsigned int) numSamples);
                SuperpoweredShortIntToFloat(intBuffer, playerBuffer, (unsigned int) numSamples);

                recorder->process(playerBuffer, (unsigned int) numSamples);
                SuperpoweredFloatToShortInt(playerBuffer, audio, (unsigned int) numSamples);

            };
            outputBuffers->clear();
            return true;
        };
    }
    return false;
}


extern "C" JNIEXPORT void
Java_com_example_activities_DubsmashActivity_InitAudio(
        JNIEnv  __unused *env,
        jobject  __unused obj,
        jint bufferSize,
        jint sampleRate,
        jstring outputPath,
        jstring tempPath
) {

    decoder = new SuperpoweredDecoder();

    outputBuffers = new SuperpoweredAudiopointerList(8, 16);

    outFilePath = env->GetStringUTFChars(outputPath, 0);
    tempFilePath = env->GetStringUTFChars(tempPath, 0);

}

extern "C" JNIEXPORT jdouble
Java_com_example_activities_DubsmashActivity_OpenFile(
        JNIEnv *env,
        jobject  __unused obj,
        jstring filePath) {
    const char *path = env->GetStringUTFChars(filePath, 0);
    decoder->open(path);
    intBuffer = (short int *) malloc(decoder->samplesPerFrame * 2 * sizeof(short int) + 32768);
    playerBuffer = (float *) malloc(decoder->samplesPerFrame * 2 * sizeof(short int) + 32768);
    audioIO = new SuperpoweredAndroidAudioIO(
            decoder->samplerate,
            decoder->samplesPerFrame,
            false,
            true,
            audioProcessing,
            NULL,
            -1, -1,
            decoder->samplesPerFrame * 2
    );

    stretching = new SuperpoweredTimeStretching(decoder->samplerate);

    stretching->setRateAndPitchShift(1, 0);

    recorder = new SuperpoweredRecorder(
            tempFilePath,              
            decoder->samplerate,  
            1,                  
            2,                  
            false,             
            recorderStopped,    
            NULL               
    );

    return 0;
}

Some notes to consider:

  1. This is not a duplicate of this question, since the solution in that thread doesn't work for me
  2. I have tried playing with the decoder->samplesPerFrame and numSamples but I can't get a decent output.
  3. If I set the Time Stretching to 1 and Pitch Shift to 0 the sound plays seamlessly.

UPDATE 1:
After some more tampering and playing with different values for number of samples, I figured the problem must be with the difference between the amount of samples that the audio output (DAC MAN) expects and the amount that outputBuffers->nextSliceItem actually provides.
Having said that I can think of a way to mitigate this problem and that would be to append the output of outputBuffers->nextSliceItem to a temporary buffer and then when it reaches the threshold, direct it to the audio output.

Hence my second question: Is there a way in C++ to append a buffer to another buffer?

like image 340
2hamed Avatar asked Oct 19 '18 18:10

2hamed


1 Answers

You need to output audioProcessing(int numberOfFrames) number of frames. Therefore in outputBuffers->makeSlice you need to ask numberOfFrames, not outputBuffers->sampleLength (basically you are asking for "any number of frames in outputBuffers", not "numberOfFrames").

Then you convert from float to int, then back to float? It doesn't make sense. You got floating point audio in timeStretchedAudio, which can be immediately processed by your recorder.

After that you forgot to step "audio" forward after you convert some floating point samples into it.

And finally you remove all audio from outputBuffers, while you need to remove only the number of frames you output to "audio".

like image 71
Gabor Szanto Avatar answered Nov 10 '22 02:11

Gabor Szanto