I am trying to construct a JPEG image from the RTSP MJPEG stream of an IP Camera. I am using CURL to get the RTSP data and then I am following the RTP Payload Format for JPEG-compressed video RFC
I have been successfully able to form JPEG images which do not have the Quantization Tables in them .However with Q of type 128, my algorithm is not able to correctly assemble the JPEG image.
I know there are libraries like ffmpeg and gstreamer that can perform this decoding but I can't use them because of some other issues. I have also looked at other posts and I think I am decoding the stream properly. But after JPEG decompression the image looks like this. The background image seems correct but with incorrect horizontal bands scattered over the picture.
This is the code snippet I am using:
while(!firstPacketFound) {
std::cout << "-----------RTP Packet: " << index++ << " starting at " << startOfRTPPacketIterator << "---------"<< endl;
isFirstPacket = false; isLastPacket = false;
// 0 - LibCurl header
// 1 - channel identifier
// 2, 3 - packet length
uint16_T packet_length_msb = f[ startOfRTPPacketIterator+2];
uint16_T packet_length_lsb = f[ startOfRTPPacketIterator+3];
packet_length = packet_length_msb << 8 | packet_length_lsb;
std::cout << "Packet Size: " << packet_length << endl;
// 4 - RTP Header - version + pasdding - generally 80
// 5 - Marker Bit + Payload 26 for JPEG
uint8_T rtp1 = f[ startOfRTPPacketIterator+4];
uint8_T payloadandmarkerbit = f[startOfRTPPacketIterator+5];
uint8_T markerbit =(payloadandmarkerbit >> 7) ;
uint8_T payload = payloadandmarkerbit & 0x7F;
std::cout << "Marker byte : " << int(payloadandmarkerbit) << endl;
if( payload != 26) {
cout << "NOT JPEG packet" << endl;
}
// 6 & 7 - sequence
// 8, 9, 10,11 - timestamp
//12, 13, 14, 15 - ssrc
uint16_t sequenceNumber_m = f[ startOfRTPPacketIterator+6];
uint16_t sequenceNumber_l = f[ startOfRTPPacketIterator+7];
uint16_t sequenceNumber = (sequenceNumber_m << 8) | sequenceNumber_l;
std::cout << "Sequence Number : " << sequenceNumber <<endl;
uint32_t timestamp1 = f[ startOfRTPPacketIterator+8];
uint32_t timestamp2 = f[ startOfRTPPacketIterator+9];
uint32_t timestamp3 = f[ startOfRTPPacketIterator+10];
uint32_t timestamp4 = f[ startOfRTPPacketIterator+11];
uint32_t timestamp = (timestamp1 <<24) | (timestamp2 <<16) | (timestamp3<<8) | timestamp4;
std::cout << "Timestamp : " << timestamp <<endl;
//--------------------JPEG Header ----------------------/
// 16 - type specific
// 17, 18, 19, - framgemnt offset
// 20 - type
// 21 - Q
// 22 - Width
//23- Height
uint32_t framgent_offset_1 = f[startOfRTPPacketIterator+17];
uint32_t framgent_offset_2 = f[startOfRTPPacketIterator+18];
uint32_t framgent_offset_3 = f[startOfRTPPacketIterator+19];
uint32_t framgent_offset = ((framgent_offset_1 << 16) | (framgent_offset_2 << 8) | (framgent_offset_3));
if (framgent_offset == 0)
{
isFirstPacket = true;
}
cout << "Fragment Offset: " << framgent_offset <<endl;
uint8_t type_specific = f[ startOfRTPPacketIterator+16];
cout << "type_specific: " << int(type_specific) <<endl;
uint8_t type = f[startOfRTPPacketIterator+20];
cout << "type : " << int(type) <<endl;
uint8_t Q = f[ startOfRTPPacketIterator+21];
cout << "Q: " << int(Q) <<endl;
uint8_t width = f[ startOfRTPPacketIterator+22];
cout << "width : " << int(width*8) <<endl;
uint8_t height = f[ startOfRTPPacketIterator+23];
cout << "height : " << int(height*8) <<endl;
// Check for restart markers
if ( type > 63) {
// For 64-127, restart markers are present in the JPEG data
//------------------Restart Marker Header------------------/
// 24, 25 - restart interval
// 26, 27 - F+L+ Restart Count
uint16_T restart_interval_m = uint16_T(f[startOfRTPPacketIterator+24]);
uint16_T restart_interval_l = uint16_T(f[startOfRTPPacketIterator+25]);
restart_interval = (restart_interval_m <<8 ) | restart_interval_l;
uint16_T restart_count_m = uint16_T(f[startOfRTPPacketIterator+26]);
uint16_T restart_count_l = uint16_T(f[startOfRTPPacketIterator+27]);
uint16_T restart_count = ((restart_count_m <<8 ) | restart_count_l) & 0x3F;
uint8_T F = restart_count_m >> 7;
uint8_T L = (restart_count_m & 0x40) >> 6;
cout << "restart_interval : " << restart_interval <<endl;
cout << "restart_count : " << restart_count <<endl;
cout << "F : " << int(F) <<endl;
cout << "L : " << int(L) <<endl;
}
//Check for quantization table in the first packet of the frame
if ( Q > 127) {
//--------------Quantization Header -----------------------/
// 28 - MBZ
// 29- Precision
// 30, 31 - length
// ...Quantization data
uint8_t MBZ = f[startOfRTPPacketIterator+28];
uint8_t precision = f[startOfRTPPacketIterator+29];
uint16_T qauntization_data_length = uint16_T(f[startOfRTPPacketIterator+30]) << 8 | uint16_T(f[startOfRTPPacketIterator+31]) ;
cout << "MBZ : " << int(MBZ) << endl;
cout << "precision : " << int(precision) << endl;
cout << "qauntization_data_length : " << qauntization_data_length << endl;
}
int indexOfPayload;
indexOfPayload = 28 ;
if (isFirstPacket) {
cout << "First packet found" << endl;
memcpy(lqt,&f[startOfRTPPacketIterator+32],64);
memcpy(cqt,&f[startOfRTPPacketIterator+32+65],64);
// MakeTables(Q,lqt,cqt);
unsigned char * p = new unsigned char[3000];
int sizeOfHeaders = MakeHeaders(p,type,width, height, lqt, cqt, restart_interval);
cout << "Size of headers:"<< sizeOfHeaders <<endl;
jpegPayload.insert(jpegPayload.end(),p,p+sizeOfHeaders+1);
delete p;
indexOfPayload = 32 +128 ;
}
int laststartOfRTPPacketIterator = startOfRTPPacketIterator;
// Look for start of frame marker
std::vector<uint8_t>::iterator startOfRTPPacketIterator2;
// startOfRTPPacketIterator2 = std::find(f.begin() + startOfRTPPacketIterator +packet_length ,f.end(),0x24);
startOfRTPPacketIterator2 = std::search(f.begin() + startOfRTPPacketIterator +packet_length,f.end(),startOfStreamMarker.begin(),startOfStreamMarker.begin()+2);
startOfRTPPacketIterator = (int)std::distance(f.begin(),startOfRTPPacketIterator2);
cout << "Adding payload data" << endl;
cout << "Starting at " << (laststartOfRTPPacketIterator+indexOfPayload) << endl;
cout << "Ending at " << startOfRTPPacketIterator << endl;
cout << "Size of vector before : " <<jpegPayload.size() <<endl;
// jpegPayload.reserve(framgent_offset);
jpegPayload.insert(jpegPayload.end(), &f[laststartOfRTPPacketIterator+indexOfPayload], &f[startOfRTPPacketIterator]);
cout << "Size of vector after : " <<jpegPayload.size() <<endl;
if (markerbit) {
firstPacketFound = true;
cout << "Last packet found" << endl;
isLastPacket = true;
FILE * image = fopen("image.jpeg","wb");
fwrite(&jpegPayload[0],sizeof(char), jpegPayload.size(), image);
fclose(image);
}
Any ideas what could be going wrong?
Thanks!
UPDATE: I also noticed that the application that works uses UDP streaming, whereas I set transport to TCP - const char *transport = "RTP/AVP/TCP;unicast;interleaved=0-1";
Could that result in any differences? Do some cameras only support UDP and not TCP.
There were extra 0x0d in the data that I was getting from curl. All the extra 0x0d were before a 0x0a. I removed all the 0x0d that came before a 0x0a and then my algorithm worked fine. I figured this out by comparing the output of the data given by curl and wire shark. It seems like a bug in curl inserting extra characters.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With