Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Encoding AAC with ffmpeg (c++)

I'm working on video encoding that will be used in a Unity plugin. I have made image encoding work, but now I'm at the audio. So trying only with the audio in to a mp4 file with AAC encoding. And I'm stuck. The resulting file does not contain anything. Also, from what I understand, AAC in ffmpeg only supports AV_SAMPLE_FMT_FLTP, that's why I use it. Here's my code:

Setup:

int initialize_encoding_audio(const char *filename)
{
    int ret;
    AVCodecID aud_codec_id = AV_CODEC_ID_AAC;
    AVSampleFormat sample_fmt = AV_SAMPLE_FMT_FLTP;

    avcodec_register_all();
    av_register_all();

    aud_codec = avcodec_find_encoder(aud_codec_id);
    avcodec_register(aud_codec);

    if (!aud_codec)
        return COULD_NOT_FIND_AUD_CODEC;

    aud_codec_context = avcodec_alloc_context3(aud_codec);
    if (!aud_codec_context)
        return CONTEXT_CREATION_ERROR;

    aud_codec_context->bit_rate = 192000;
    aud_codec_context->sample_rate = select_sample_rate(aud_codec);
    aud_codec_context->sample_fmt = sample_fmt;
    aud_codec_context->channel_layout = AV_CH_LAYOUT_STEREO;
    aud_codec_context->channels = av_get_channel_layout_nb_channels(aud_codec_context->channel_layout);

    aud_codec_context->codec = aud_codec;
    aud_codec_context->codec_id = aud_codec_id;

    ret = avcodec_open2(aud_codec_context, aud_codec, NULL);

    if (ret < 0)
        return COULD_NOT_OPEN_AUD_CODEC;

    outctx = avformat_alloc_context();
    ret = avformat_alloc_output_context2(&outctx, NULL, "mp4", filename);

    outctx->audio_codec = aud_codec;
    outctx->audio_codec_id = aud_codec_id;

    audio_st = avformat_new_stream(outctx, aud_codec);

    audio_st->codecpar->bit_rate = aud_codec_context->bit_rate;
    audio_st->codecpar->sample_rate = aud_codec_context->sample_rate;
    audio_st->codecpar->channels = aud_codec_context->channels;
    audio_st->codecpar->channel_layout = aud_codec_context->channel_layout;
    audio_st->codecpar->codec_id = aud_codec_id;
    audio_st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
    audio_st->codecpar->format = sample_fmt;
    audio_st->codecpar->frame_size = aud_codec_context->frame_size;
    audio_st->codecpar->block_align = aud_codec_context->block_align;
    audio_st->codecpar->initial_padding = aud_codec_context->initial_padding;

    outctx->streams = new AVStream*[1];
    outctx->streams[0] = audio_st;

    av_dump_format(outctx, 0, filename, 1);

    if (!(outctx->oformat->flags & AVFMT_NOFILE))
    {
        if (avio_open(&outctx->pb, filename, AVIO_FLAG_WRITE) < 0)
            return COULD_NOT_OPEN_FILE;
    }

    ret = avformat_write_header(outctx, NULL);

    aud_frame = av_frame_alloc();
    aud_frame->nb_samples = aud_codec_context->frame_size;
    aud_frame->format = aud_codec_context->sample_fmt;
    aud_frame->channel_layout = aud_codec_context->channel_layout;

    int buffer_size = av_samples_get_buffer_size(NULL, aud_codec_context->channels, aud_codec_context->frame_size,
        aud_codec_context->sample_fmt, 0);

    av_frame_get_buffer(aud_frame, buffer_size / aud_codec_context->channels);

    if (!aud_frame)
        return COULD_NOT_ALLOCATE_FRAME;

    aud_frame_counter = 0;

    return 0;
}

Encoding:

int encode_audio_samples(uint8_t **aud_samples)
{
    int ret;

    int buffer_size = av_samples_get_buffer_size(NULL, aud_codec_context->channels, aud_codec_context->frame_size,
        aud_codec_context->sample_fmt, 0);

    for (size_t i = 0; i < buffer_size / aud_codec_context->channels; i++)
    {
        aud_frame->data[0][i] = aud_samples[0][i];
        aud_frame->data[1][i] = aud_samples[1][i];
    }

    aud_frame->pts = aud_frame_counter++;

    ret = avcodec_send_frame(aud_codec_context, aud_frame);
    if (ret < 0)
        return ERROR_ENCODING_SAMPLES_SEND;

    AVPacket pkt;
    av_init_packet(&pkt);
    pkt.data = NULL;
    pkt.size = 0;

    fflush(stdout);

    while (true)
    {
        ret = avcodec_receive_packet(aud_codec_context, &pkt);
        if (!ret)
        {
            av_packet_rescale_ts(&pkt, aud_codec_context->time_base, audio_st->time_base);

            pkt.stream_index = audio_st->index;
            av_write_frame(outctx, &pkt);
            av_packet_unref(&pkt);
        }
        if (ret == AVERROR(EAGAIN))
            break;
        else if (ret < 0)
            return ERROR_ENCODING_SAMPLES_RECEIVE;
        else
            break;
    }

    return 0;
}

Finish encoding:

int finish_audio_encoding()
{
    AVPacket pkt;
    av_init_packet(&pkt);
    pkt.data = NULL;
    pkt.size = 0;

    fflush(stdout);

    int ret = avcodec_send_frame(aud_codec_context, NULL);
    if (ret < 0)
        return ERROR_ENCODING_FRAME_SEND;

    while (true)
    {
        ret = avcodec_receive_packet(aud_codec_context, &pkt);
        if (!ret)
        {
            if (pkt.pts != AV_NOPTS_VALUE)
                pkt.pts = av_rescale_q(pkt.pts, aud_codec_context->time_base, audio_st->time_base);
            if (pkt.dts != AV_NOPTS_VALUE)
                pkt.dts = av_rescale_q(pkt.dts, aud_codec_context->time_base, audio_st->time_base);

            av_write_frame(outctx, &pkt);
            av_packet_unref(&pkt);
        }
        if (ret == -AVERROR(AVERROR_EOF))
            break;
        else if (ret < 0)
            return ERROR_ENCODING_FRAME_RECEIVE;
    }

    av_write_trailer(outctx);
}

Main:

void get_audio_frame(float_t *left_samples, float_t *right_samples, int frame_size, float* t, float* tincr, float* tincr2)
{
    int j, i;
    float v;
    for (j = 0; j < frame_size; j++)
    {
        v = sin(*t);
        *left_samples = v;
        *right_samples = v;

        left_samples++;
        right_samples++;

        *t += *tincr;
        *tincr += *tincr2;
    }
}

int main()
{
    int frame_rate = 30;  // this should be like 96000 / 1024 or somthing i guess?
    float t, tincr, tincr2;

    initialize_encoding_audio("audio.mp4");

    int sec = 50;

    float_t** aud_samples;
    int src_samples_linesize;
    int src_nb_samples = 1024;
    int src_channels = 2;

    int ret = av_samples_alloc_array_and_samples((uint8_t***)&aud_samples, &src_samples_linesize, src_channels,
        src_nb_samples, AV_SAMPLE_FMT_FLTP, 0);


    t = 0;
    tincr = 0;
    tincr2 = 0;

    for (size_t i = 0; i < frame_rate * sec; i++)
    {
        get_audio_frame(aud_samples[0], aud_samples[1], src_nb_samples, &t, &tincr, &tincr2);

        encode_audio_samples((uint8_t **)aud_samples);

    }

    finish_audio_encoding();
    //cleanup();

    return 0;
}

I guess the first thing that I would want to make sure I got right is the synthetic sound generation and how I transfer that to the AVFrame. Are my conversions correct? But feel free to point out anything that might be wrong.

Thanks in advance!

Edit: the whole source: http://pastebin.com/jYtmkhek

Edit2: Added initialization of tincr & tincr2

like image 459
Mockarutan Avatar asked Nov 21 '16 12:11

Mockarutan


People also ask

Does FFmpeg support AAC?

FFmpeg supports two AAC-LC encoders ( aac and libfdk_aac ) and one HE-AAC (v1/2) encoder ( libfdk_aac ). The license of libfdk_aac is not compatible with GPL, so the GPL does not permit distribution of binaries containing incompatible code when GPL-licensed code is also included.

How do I encode an AAC file?

For example, open an audio file, select File > Save As, click in the Format field, and select Edit. In the Audio File Format dialog, select AAC (Advanced Audio Coding) as type, click the Encoding field, and select Edit. Specifies the container for the AAC file.

How do I enable encoder in FFmpeg?

When you configure your FFmpeg build, all the supported native encoders are enabled by default. Encoders requiring an external library must be enabled manually via the corresponding "--enable-lib" option. You can list all available encoders using the configure option "--list-encoders".


1 Answers

Unless I'm missing something from the pastebin, you forgot to initialize a few variables. You're using garbage to generate your samples.

float t, tincr, tincr2;
[...]
get_audio_frame(aud_samples[0], aud_samples[1], src_nb_samples, &t, &tincr, &tincr2);

You probably want to start with t=0 and increment by 2 * PI * frequency / sample rate for a sine wave.

Also, avformat_new_stream() creates the stream for you, don't do it with new.

Update:

I removed all the c++ stuff to test this. Here's the code that works: pastebin

And here's the resulting file: audio.mp4

ffmpeg -i audio.mp4 -filter_complex "showwaves=s=640x120:mode=line:colors=white" -frames:v 1 wave.jpg

enter image description here

Diff:

1,6d0
< #include "encoder.h"
< #include <algorithm>
< #include <iterator>
< 
< extern "C"
< {
14a9
> #include <math.h>
40,41c35,36
<   SwsContext *sws_ctx;
<   SwrContext *swr_ctx = NULL;
---
> struct SwsContext *sws_ctx;
> struct SwrContext *swr_ctx = NULL;
76,77c71,72
<       AVCodecID aud_codec_id = AV_CODEC_ID_AAC;
<       AVSampleFormat sample_fmt = AV_SAMPLE_FMT_FLTP;
---
>   enum AVCodecID aud_codec_id = AV_CODEC_ID_AAC;
>   enum AVSampleFormat sample_fmt = AV_SAMPLE_FMT_FLTP;
125,126c120,121
<       outctx->streams = new AVStream*[1];
<       outctx->streams[0] = audio_st;
---
>   //outctx->streams = new AVStream*[1];
>   //outctx->streams[0] = audio_st;
182c177
<       while (true)
---
>   while (1)
216c211
<       while (true)
---
>   while (1)
291c286
<       float t, tincr, tincr2;
---
>   float t = 0, tincr = 2 * M_PI * 440.0 / 96000, tincr2 = 0;
317d311
<   }
like image 73
aergistal Avatar answered Oct 30 '22 03:10

aergistal