I would like to stream live video from webcam using OpenCV using H264 codec and converting that to FLV then stream over RTMP server and catch the stream in browser with flv.js. Basically I have everything working except that I cannot read stream in flv.js. I can open stream with ffplay
so I think at least most of the things are set correctly.
My current implementation:
#include <iostream>
#include <vector>
#include <opencv2/highgui.hpp>
#include <opencv2/video.hpp>
extern "C" {
#include <libavformat/avformat.h>
#include <libavcodec/avcodec.h>
#include <libavutil/avutil.h>
#include <libavutil/pixdesc.h>
#include <libavutil/imgutils.h>
#include <libswscale/swscale.h>
}
void stream_video(double width, double height, int fps, int camID)
{
av_register_all();
avformat_network_init();
const char *output = "rtmp://localhost/live/stream";
const AVRational dst_fps = {fps, 1};
int ret;
// initialize video capture device
cv::VideoCapture cam(camID);
if (!cam.isOpened())
{
std::cout << "Failed to open video capture device!" << std::endl;
exit(1);
}
cam.set(cv::CAP_PROP_FRAME_WIDTH, width);
cam.set(cv::CAP_PROP_FRAME_HEIGHT, height);
// allocate cv::Mat with extra bytes (required by AVFrame::data)
std::vector<uint8_t> imgbuf(height * width * 3 + 16);
cv::Mat image(height, width, CV_8UC3, imgbuf.data(), width * 3);
// open output format context
AVFormatContext *outctx = nullptr;
ret = avformat_alloc_output_context2(&outctx, nullptr, "flv", output);
if (ret < 0)
{
std::cout << "Could not allocate output format context!" << std::endl;
exit(1);
}
// open output IO context
if (!(outctx->oformat->flags & AVFMT_NOFILE))
{
ret = avio_open2(&outctx->pb, output, AVIO_FLAG_WRITE, nullptr, nullptr);
if (ret < 0)
{
std::cout << "Could not open output IO context!" << std::endl;
exit(1);
}
}
// create new video stream
AVCodec *codec = avcodec_find_encoder(AV_CODEC_ID_H264);
AVStream *strm = avformat_new_stream(outctx, codec);
AVCodecContext *avctx = avcodec_alloc_context3(codec);
avctx->codec_id = AV_CODEC_ID_H264;
avctx->width = width;
avctx->height = height;
avctx->pix_fmt = AV_PIX_FMT_YUV420P;
avctx->framerate = dst_fps;
avctx->time_base = av_inv_q(dst_fps);
ret = avcodec_parameters_from_context(strm->codecpar, avctx);
if (ret < 0)
{
std::cout << "Could not initialize stream codec parameters!" << std::endl;
exit(1);
}
AVDictionary *opts = nullptr;
av_dict_set(&opts, "preset", "superfast", 0);
av_dict_set(&opts, "tune", "zerolatency", 0);
// open video encoder
ret = avcodec_open2(avctx, codec, &opts);
if (ret < 0)
{
std::cout << "Could not open video encoder!" << std::endl;
exit(1);
}
// initialize sample scaler
SwsContext *swsctx = sws_getContext(width, height, AV_PIX_FMT_BGR24, width, height, avctx->pix_fmt, SWS_BICUBIC, nullptr, nullptr, nullptr);
if (!swsctx)
{
std::cout << "Could not initialize sample scaler!" << std::endl;
exit(1);
}
// allocate frame buffer for encoding
AVFrame *frame = av_frame_alloc();
std::vector<uint8_t> framebuf(av_image_get_buffer_size(avctx->pix_fmt, width, height, 1));
av_image_fill_arrays(frame->data, frame->linesize, framebuf.data(), avctx->pix_fmt, width, height, 1);
frame->width = width;
frame->height = height;
frame->format = static_cast<int>(avctx->pix_fmt);
// write header
ret = avformat_write_header(outctx, nullptr);
if (ret < 0)
{
std::cout << "Could not write header!" << std::endl;
exit(1);
}
// encoding loop
int64_t frame_pts = 0;
unsigned nb_frames = 0;
bool end_of_stream = false;
do
{
nb_frames++;
if (!end_of_stream)
{
cam >> image;
// convert cv::Mat to AVFrame.
const int stride[] = {static_cast<int>(image.step[0])};
sws_scale(swsctx, &image.data, stride, 0, image.rows, frame->data, frame->linesize);
frame->pts = frame_pts++;
}
// encode video frame.
AVPacket pkt = {0};
av_init_packet(&pkt);
ret = avcodec_send_frame(avctx, frame);
if (ret < 0)
{
std::cout << "Error sending frame to codec context!" << std::endl;
exit(1);
}
ret = avcodec_receive_packet(avctx, &pkt);
if (ret < 0)
{
std::cout << "Error receiving packet from codec context!" << std::endl;
exit(1);
}
// rescale packet timestamp.
av_packet_rescale_ts(&pkt, avctx->time_base, strm->time_base);
// write packet.
pkt.pts = AV_NOPTS_VALUE;
pkt.dts = AV_NOPTS_VALUE;
av_interleaved_write_frame(outctx, &pkt);
std::cout << " Frames: " << nb_frames << '\r' << std::flush;
av_packet_unref(&pkt);
} while (!end_of_stream);
av_write_trailer(outctx);
std::cout << nb_frames << " frames encoded" << std::endl;
av_frame_free(&frame);
avcodec_close(avctx);
avio_close(outctx->pb);
avformat_free_context(outctx);
}
int main()
{
double width = 1280, height = 720, fps = 30;
int camID = 1;
stream_video(width, height, fps, camID);
return 0;
}
As I said before I can successfully open the stream with ffplay rtmp://localhost/live/stream
or ffplay http://localhost:8000/live/stream.flv
but I cannot open the stream with flv.js
player inside browser with getting errors:
flv: Invalid AVCDecoderConfigurationRecord, lack of data!
[FLVDemuxer] > Malformed Nalus near timestamp 0, NaluSize > DataSize!
[FLVDemuxer] > Malformed Nalus near timestamp 1, NaluSize > DataSize!
[FLVDemuxer] > Malformed Nalus near timestamp 2, NaluSize > DataSize!
....
I would really appreciate any help of fixing the stream to work properly with flv.js
, if I stream video like ffmpeg -re -i input.mp4 -c copy -f flv rtmp://localhost/live/stream
I can open stream in flv.js
without any issues, so "this command" I would like to achieve inside code roughly.
I also put my code on GitHub repository here if someone would like to compile the code and check on it.
I solve this issue myself.
Main reason why this wasn't working as expected is that AVStream extradata (sps and pps headers) was empty. I needed to manually copy extradata
and extradata_size
from AVCodecContext (not sure why this isn't done automatically). After I done this I saw picture in flv.js
for the first time. Then I just needed to calculate frame->pts
correctly to get video stream working correctly. I am attaching whole working code below in case anyone else encounters same issue.
#include <iostream>
#include <vector>
#include <opencv2/highgui.hpp>
#include <opencv2/video.hpp>
extern "C" {
#include <libavformat/avformat.h>
#include <libavcodec/avcodec.h>
#include <libavutil/imgutils.h>
#include <libswscale/swscale.h>
}
cv::VideoCapture get_device(int camID, double width, double height)
{
cv::VideoCapture cam(camID);
if (!cam.isOpened())
{
std::cout << "Failed to open video capture device!" << std::endl;
exit(1);
}
cam.set(cv::CAP_PROP_FRAME_WIDTH, width);
cam.set(cv::CAP_PROP_FRAME_HEIGHT, height);
return cam;
}
void initialize_avformat_context(AVFormatContext *&fctx, const char *format_name)
{
int ret = avformat_alloc_output_context2(&fctx, nullptr, format_name, nullptr);
if (ret < 0)
{
std::cout << "Could not allocate output format context!" << std::endl;
exit(1);
}
}
void initialize_io_context(AVFormatContext *&fctx, const char *output)
{
if (!(fctx->oformat->flags & AVFMT_NOFILE))
{
int ret = avio_open2(&fctx->pb, output, AVIO_FLAG_WRITE, nullptr, nullptr);
if (ret < 0)
{
std::cout << "Could not open output IO context!" << std::endl;
exit(1);
}
}
}
void set_codec_params(AVFormatContext *&fctx, AVCodecContext *&codec_ctx, double width, double height, int fps)
{
const AVRational dst_fps = {fps, 1};
codec_ctx->codec_tag = 0;
codec_ctx->codec_id = AV_CODEC_ID_H264;
codec_ctx->codec_type = AVMEDIA_TYPE_VIDEO;
codec_ctx->width = width;
codec_ctx->height = height;
codec_ctx->gop_size = 12;
codec_ctx->pix_fmt = AV_PIX_FMT_YUV420P;
codec_ctx->framerate = dst_fps;
codec_ctx->time_base = av_inv_q(dst_fps);
if (fctx->oformat->flags & AVFMT_GLOBALHEADER)
{
codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
}
void initialize_codec_stream(AVStream *&stream, AVCodecContext *&codec_ctx, AVCodec *&codec)
{
int ret = avcodec_parameters_from_context(stream->codecpar, codec_ctx);
if (ret < 0)
{
std::cout << "Could not initialize stream codec parameters!" << std::endl;
exit(1);
}
AVDictionary *codec_options = nullptr;
av_dict_set(&codec_options, "profile", "high", 0);
av_dict_set(&codec_options, "preset", "superfast", 0);
av_dict_set(&codec_options, "tune", "zerolatency", 0);
// open video encoder
ret = avcodec_open2(codec_ctx, codec, &codec_options);
if (ret < 0)
{
std::cout << "Could not open video encoder!" << std::endl;
exit(1);
}
}
SwsContext *initialize_sample_scaler(AVCodecContext *codec_ctx, double width, double height)
{
SwsContext *swsctx = sws_getContext(width, height, AV_PIX_FMT_BGR24, width, height, codec_ctx->pix_fmt, SWS_BICUBIC, nullptr, nullptr, nullptr);
if (!swsctx)
{
std::cout << "Could not initialize sample scaler!" << std::endl;
exit(1);
}
return swsctx;
}
AVFrame *allocate_frame_buffer(AVCodecContext *codec_ctx, double width, double height)
{
AVFrame *frame = av_frame_alloc();
std::vector<uint8_t> framebuf(av_image_get_buffer_size(codec_ctx->pix_fmt, width, height, 1));
av_image_fill_arrays(frame->data, frame->linesize, framebuf.data(), codec_ctx->pix_fmt, width, height, 1);
frame->width = width;
frame->height = height;
frame->format = static_cast<int>(codec_ctx->pix_fmt);
return frame;
}
void write_frame(AVCodecContext *codec_ctx, AVFormatContext *fmt_ctx, AVFrame *frame)
{
AVPacket pkt = {0};
av_init_packet(&pkt);
int ret = avcodec_send_frame(codec_ctx, frame);
if (ret < 0)
{
std::cout << "Error sending frame to codec context!" << std::endl;
exit(1);
}
ret = avcodec_receive_packet(codec_ctx, &pkt);
if (ret < 0)
{
std::cout << "Error receiving packet from codec context!" << std::endl;
exit(1);
}
av_interleaved_write_frame(fmt_ctx, &pkt);
av_packet_unref(&pkt);
}
void stream_video(double width, double height, int fps, int camID)
{
av_register_all();
avformat_network_init();
const char *output = "rtmp://localhost/live/stream";
int ret;
auto cam = get_device(camID, width, height);
std::vector<uint8_t> imgbuf(height * width * 3 + 16);
cv::Mat image(height, width, CV_8UC3, imgbuf.data(), width * 3);
AVFormatContext *ofmt_ctx = nullptr;
AVCodec *out_codec = nullptr;
AVStream *out_stream = nullptr;
AVCodecContext *out_codec_ctx = nullptr;
initialize_avformat_context(ofmt_ctx, "flv");
initialize_io_context(ofmt_ctx, output);
out_codec = avcodec_find_encoder(AV_CODEC_ID_H264);
out_stream = avformat_new_stream(ofmt_ctx, out_codec);
out_codec_ctx = avcodec_alloc_context3(out_codec);
set_codec_params(ofmt_ctx, out_codec_ctx, width, height, fps);
initialize_codec_stream(out_stream, out_codec_ctx, out_codec);
out_stream->codecpar->extradata = out_codec_ctx->extradata;
out_stream->codecpar->extradata_size = out_codec_ctx->extradata_size;
av_dump_format(ofmt_ctx, 0, output, 1);
auto *swsctx = initialize_sample_scaler(out_codec_ctx, width, height);
auto *frame = allocate_frame_buffer(out_codec_ctx, width, height);
int cur_size;
uint8_t *cur_ptr;
ret = avformat_write_header(ofmt_ctx, nullptr);
if (ret < 0)
{
std::cout << "Could not write header!" << std::endl;
exit(1);
}
bool end_of_stream = false;
do
{
cam >> image;
const int stride[] = {static_cast<int>(image.step[0])};
sws_scale(swsctx, &image.data, stride, 0, image.rows, frame->data, frame->linesize);
frame->pts += av_rescale_q(1, out_codec_ctx->time_base, out_stream->time_base);
write_frame(out_codec_ctx, ofmt_ctx, frame);
} while (!end_of_stream);
av_write_trailer(ofmt_ctx);
av_frame_free(&frame);
avcodec_close(out_codec_ctx);
avio_close(ofmt_ctx->pb);
avformat_free_context(ofmt_ctx);
}
int main()
{
// av_log_set_level(AV_LOG_DEBUG);
double width = 1280, height = 720;
int camID = 1, fps = 25;
stream_video(width, height, fps, camID);
return 0;
}
That's it!
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With