I am new to libav and I am writing a video manipulation software which uses opencv as its heart. What I did is briefly as below:
1- read the video packet
2- decode the packet into AVFrame
3- convert the AVFrame to CV Mat
4- manipulate the Mat
5- convert the CV Mat into AVFrame
6- encode the AVFrame into AVPacket
7- write the packet
8- goto 1
I read dranger tutorial in http://dranger.com/ffmpeg/tutorial01.html and I also used decoding_encoding example. I can read the video, extract video frames and convert them to CV Mat. My problem starts from converting from cv Mat to AVFrame and encode it to AVPacket.
Would you please help me with this?
Here is my code :
int main(int argc, char **argv)
{
AVOutputFormat *ofmt = NULL;
AVFormatContext *ifmt_ctx = NULL, *ofmt_ctx = NULL;
AVPacket pkt;
AVCodecContext *pCodecCtx = NULL;
AVCodec *pCodec = NULL;
AVFrame *pFrame = NULL;
AVFrame *pFrameRGB = NULL;
int videoStream=-1;
int audioStream=-1;
int frameFinished;
int numBytes;
uint8_t *buffer = NULL;
struct SwsContext *sws_ctx = NULL;
FrameManipulation *mal_frame;
const char *in_filename, *out_filename;
int ret, i;
if (argc < 3) {
printf("usage: %s input output\n"
"API example program to remux a media file with libavformat and libavcodec.\n"
"The output format is guessed according to the file extension.\n"
"\n", argv[0]);
return 1;
}
in_filename = arg[1];
out_filename = arg[2];
av_register_all();
if ((ret = avformat_open_input(&ifmt_ctx, in_filename, 0, 0)) < 0) {
fprintf(stderr, "Could not open input file '%s'", in_filename);
goto end;
}
if ((ret = avformat_find_stream_info(ifmt_ctx, 0)) < 0) {
fprintf(stderr, "Failed to retrieve input stream information");
goto end;
}
av_dump_format(ifmt_ctx, 0, in_filename, 0);
avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, out_filename);
if (!ofmt_ctx) {
fprintf(stderr, "Could not create output context\n");
ret = AVERROR_UNKNOWN;
goto end;
}
ofmt = ofmt_ctx->oformat;
for (i = 0; i < ifmt_ctx->nb_streams; i++) {
AVStream *in_stream = ifmt_ctx->streams[i];
AVStream *out_stream = avformat_new_stream(ofmt_ctx, in_stream->codec->codec);
if(ifmt_ctx->streams[i]->codec->codec_type==AVMEDIA_TYPE_VIDEO &&
videoStream < 0) {
videoStream=i;
}
if(ifmt_ctx->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO &&
audioStream < 0) {
audioStream=i;
}
if (!out_stream) {
fprintf(stderr, "Failed allocating output stream\n");
ret = AVERROR_UNKNOWN;
goto end;
}
ret = avcodec_copy_context(out_stream->codec, in_stream->codec);
if (ret < 0) {
fprintf(stderr, "Failed to copy context from input to output stream codec context\n");
goto end;
}
out_stream->codec->codec_tag = 0;
if (ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
out_stream->codec->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
pCodec=avcodec_find_decoder(ifmt_ctx->streams[videoStream]->codec->codec_id);
pCodecCtx = avcodec_alloc_context3(pCodec);
if(avcodec_copy_context(pCodecCtx, ifmt_ctx->streams[videoStream]->codec) != 0) {
fprintf(stderr, "Couldn't copy codec context");
return -1; // Error copying codec context
}
// Open codec
if(avcodec_open2(pCodecCtx, pCodec, NULL)<0)
return -1; // Could not open codec
// Allocate video frame
pFrame=av_frame_alloc();
// Allocate an AVFrame structure
pFrameRGB=av_frame_alloc();
// Determine required buffer size and allocate buffer
numBytes=avpicture_get_size(AV_PIX_FMT_RGB24, ifmt_ctx->streams[videoStream]->codec->width,
ifmt_ctx->streams[videoStream]->codec->height);
buffer=(uint8_t *)av_malloc(numBytes*sizeof(uint8_t));
// Assign appropriate parts of buffer to image planes in pFrameRGB
// Note that pFrameRGB is an AVFrame, but AVFrame is a superset
// of AVPicture
avpicture_fill((AVPicture *)pFrameRGB, buffer, AV_PIX_FMT_BGR24,
ifmt_ctx->streams[videoStream]->codec->width, ifmt_ctx->streams[videoStream]->codec->height);
av_dump_format(ofmt_ctx, 0, out_filename, 1);
if (!(ofmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&ofmt_ctx->pb, out_filename, AVIO_FLAG_WRITE);
if (ret < 0) {
fprintf(stderr, "Could not open output file '%s'", out_filename);
goto end;
}
}
ret = avformat_write_header(ofmt_ctx, NULL);
if (ret < 0) {
fprintf(stderr, "Error occurred when opening output file\n");
goto end;
}
// Assign appropriate parts of buffer to image planes in pFrameRGB
// Note that pFrameRGB is an AVFrame, but AVFrame is a superset
// of AVPicture
avpicture_fill((AVPicture *)pFrameRGB, buffer, AV_PIX_FMT_BGR24,
ifmt_ctx->streams[videoStream]->codec->width,
ifmt_ctx->streams[videoStream]->codec->height);
// initialize SWS context for software scaling
sws_ctx = sws_getContext(
ifmt_ctx->streams[videoStream]->codec->width,
ifmt_ctx->streams[videoStream]->codec->height,
ifmt_ctx->streams[videoStream]->codec->pix_fmt,
ifmt_ctx->streams[videoStream]->codec->width,
ifmt_ctx->streams[videoStream]->codec->height,
AV_PIX_FMT_BGR24,
SWS_BICUBIC,
NULL,
NULL,
NULL
);
// Loop through packets
while (1) {
AVStream *in_stream, *out_stream;
ret = av_read_frame(ifmt_ctx, &pkt);
if(pkt.stream_index==videoStream)
// Decode video frame
avcodec_decode_video2(pCodecCtx, pFrame, &frameFinished, &pkt);
if(frameFinished) {
sws_scale(sws_ctx, (uint8_t const * const *)pFrame->data,
pFrame->linesize, 0, pCodecCtx->height,
pFrameRGB->data, pFrameRGB->linesize);
cv::Mat img= mal_frame->process(
pFrameRGB,pFrame->width,pFrame->height);
/* My problem is Here ------------*/
avpicture_fill((AVPicture*)pFrameRGB,
img.data,
PIX_FMT_BGR24,
outStream->codec->width,
outStream->codec->height);
pFrameRGB->width = ifmt_ctx->streams[videoStream]->codec->width;
pFrameRGB->height = ifmt_ctx->streams[videoStream]->codec->height;
avcodec_encode_video2(ifmt_ctx->streams[videoStream]->codec ,
&pkt , pFrameRGB , &gotPacket);
/*
I get this error
[swscaler @ 0x14b58a0] bad src image pointers
[swscaler @ 0x14b58a0] bad src image pointers
*/
/* My Problem Ends here ---------- */
}
if (ret < 0)
break;
in_stream = ifmt_ctx->streams[pkt.stream_index];
out_stream = ofmt_ctx->streams[pkt.stream_index];
//log_packet(ifmt_ctx, &pkt, "in");
/* copy packet */
pkt.pts = av_rescale_q_rnd(pkt.pts, in_stream->time_base, out_stream->time_base,
AV_ROUND_NEAR_INF);
pkt.dts = av_rescale_q_rnd(pkt.dts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF);
pkt.duration = av_rescale_q(pkt.duration, in_stream->time_base, out_stream->time_base);
pkt.pos = -1;
log_packet(ofmt_ctx, &pkt, "out");
ret = av_interleaved_write_frame(ofmt_ctx, &pkt);
if (ret < 0) {
fprintf(stderr, "Error muxing packet\n");
break;
}
av_free_packet(&pkt);
}
av_write_trailer(ofmt_ctx);
end:
avformat_close_input(&ifmt_ctx);
/* close output */
if (ofmt_ctx && !(ofmt->flags & AVFMT_NOFILE))
avio_closep(&ofmt_ctx->pb);
avformat_free_context(ofmt_ctx);
if (ret < 0 && ret != AVERROR_EOF) {
return 1;
}
return 0;
}
When I run this code, I get unknown fatal error in this part:
/* My problem is Here ------------*/
avpicture_fill((AVPicture*)pFrameRGB,
img.data,
PIX_FMT_BGR24,
outStream->codec->width,
outStream->codec->height);
pFrameRGB->width = ifmt_ctx->streams[videoStream]->codec->width;
pFrameRGB->height = ifmt_ctx->streams[videoStream]->codec->height;
avcodec_encode_video2(ifmt_ctx->streams[videoStream]->codec ,
&pkt , pFrameRGB , &gotPacket);
/*
I get this error
[swscaler @ 0x14b58a0] bad src image pointers
[swscaler @ 0x14b58a0] bad src image pointers
*/
/* My Problem Ends here ---------- */
Here is where I want to convert back cv Mat to AVFrame and encode it to AVPacket. I appreciate your help.
After reading some examples, reading source code and some helps people offered, I managed to make the code run. I used transcoding and encoding examples and mixed them up. Here is my code
here are the highlights: 1- libswscale should be used to convert AVFrame with required packet format to be fed into openCV Mat. To do so, we define
struct SwsContext *sws_ctx = NULL;
sws_ctx = sws_getContext(pCodecCtx->width,
pCodecCtx->height,
pCodecCtx->pix_fmt,
pCodecCtx->width,
pCodecCtx->height,
AV_PIX_FMT_BGR24,
SWS_BICUBIC,
NULL,
NULL,
NULL
);
To convert back the opencv Mat to AVFrame, one should again use swscale and translate opencv BGR frame format to YUV. So, I do this:
struct SwsContext *sws_ctx_bgr_yuv = NULL;
sws_ctx_bgr_yuv = sws_getContext(pCodecCtx->width,
pCodecCtx->height,
AV_PIX_FMT_BGR24,
pCodecCtx->width,
pCodecCtx->height,
pCodecCtx->pix_fmt //AV_PIX_FMT_YUV420p
,0,0,NULL,NULL);
And, here is frame reading/decoding/encoding loop:
while (1) {
if ((ret = av_read_frame(ifmt_ctx, &packet)) < 0)
break;
stream_index = packet.stream_index;
type = ifmt_ctx->streams[packet.stream_index]->codec->codec_type;
av_log(NULL, AV_LOG_DEBUG, "Demuxer gave frame of stream_index %u\n",
stream_index);
if (filter_ctx[stream_index].filter_graph) {
av_log(NULL, AV_LOG_DEBUG, "Going to reencode&filter the frame\n");
frame = av_frame_alloc();
if (!frame) {
ret = AVERROR(ENOMEM);
break;
}
av_packet_rescale_ts(&packet,
ifmt_ctx->streams[stream_index]->time_base,
ifmt_ctx->streams[stream_index]->codec->time_base);
dec_func = (type == AVMEDIA_TYPE_VIDEO) ? avcodec_decode_video2 :
avcodec_decode_audio4;
ret = dec_func(ifmt_ctx->streams[stream_index]->codec, frame,
&got_frame, &packet);
if (ret < 0) {
av_frame_free(&frame);
av_log(NULL, AV_LOG_ERROR, "Decoding failed\n");
break;
}
if (got_frame) {
if(stream_index==video_index){
sws_scale(sws_ctx, (uint8_t const * const *)frame->data,
frame->linesize, 0, pCodecCtx->height,
pFrameRGB->data, pFrameRGB->linesize);
/*------------------------------------------------------------------------
/* Frame converts to opencv Mat
/*------------------------------------------------------------------------*/
cv::Mat img(frame->height,frame->width,CV_8UC3,pFrameRGB->data[0]);
img=manipulate_image(img); //this is opencv Mat, do whatever you want, but don't change its dimensions and format
//manipulate_function can be considered as as simple as blurring
const int stride[] = {img.step[0] };
/* opencv Mat converts back to AVFrame */
sws_scale(sws_ctx_bgr_yuv, &img.data, stride, 0, img.rows, frame->data, frame->linesize);
}
frame->pts = av_frame_get_best_effort_timestamp(frame);
/* AVFrame re-encodes to AVPacket and will be sent to encoder */
ret = filter_encode_write_frame(frame, stream_index);
av_frame_free(&frame);
if (ret < 0)
goto end;
} else {
av_frame_free(&frame);
}
} else {
/* remux this frame without reencoding */
av_packet_rescale_ts(&packet,
ifmt_ctx->streams[stream_index]->time_base,
ofmt_ctx->streams[stream_index]->time_base);
ret = av_interleaved_write_frame(ofmt_ctx, &packet);
if (ret < 0)
goto end;
}
av_free_packet(&packet);
}
Here is another way to convert between cv::Mat
and AVframe
using c++ based on some code I found and debugged. Please note its specifically for 8 bit 3 channel images but that can be changed by changing AV_PIX_FMT_BGR24
in both functions.
Hope this helps.
AVFrame cvmat_to_avframe(cv::Mat* frame)
{
AVFrame dst;
cv::Size frameSize = frame->size();
AVCodec *encoder = avcodec_find_encoder(AV_CODEC_ID_RAWVIDEO);
AVFormatContext* outContainer = avformat_alloc_context();
AVStream *outStream = avformat_new_stream(outContainer, encoder);
avcodec_get_context_defaults3(outStream->codec, encoder);
outStream->codec->pix_fmt = AV_PIX_FMT_BGR24;
outStream->codec->width = frame->cols;
outStream->codec->height = frame->rows;
avpicture_fill((AVPicture*)&dst, frame->data, AV_PIX_FMT_BGR24, outStream->codec->width, outStream->codec->height);
dst.width = frameSize.width;
dst.height = frameSize.height;
return dst;
}
cv::Mat avframe_to_cvmat(AVFrame *frame)
{
AVFrame dst;
cv::Mat m;
memset(&dst, 0, sizeof(dst));
int w = frame->width, h = frame->height;
m = cv::Mat(h, w, CV_8UC3);
dst.data[0] = (uint8_t *)m.data;
avpicture_fill( (AVPicture *)&dst, dst.data[0], AV_PIX_FMT_BGR24, w, h);
struct SwsContext *convert_ctx=NULL;
enum AVPixelFormat src_pixfmt = AV_PIX_FMT_BGR24;
enum AVPixelFormat dst_pixfmt = AV_PIX_FMT_BGR24;
convert_ctx = sws_getContext(w, h, src_pixfmt, w, h, dst_pixfmt,
SWS_FAST_BILINEAR, NULL, NULL, NULL);
sws_scale(convert_ctx, frame->data, frame->linesize, 0, h,
dst.data, dst.linesize);
sws_freeContext(convert_ctx);
return m;
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With