Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

C++ lzma compression and decompression of large stream by parts

Tags:

c++

qt

lzma

I need to do lzma compression and decompression on the fly. I'm receiving a large file via qnetworkmanager in Qt and I need to decompress it as the data stream is downloading.

When I receive part of the data stream I need to decompress it, append to file and than free any used memory during the process. What is the best way to do this?

Now I'm trying with xz-utils, pure c api, maybe someone can suggest a better way?

following code based on this example

UPD2:

extern "C" void *lz_alloc(void *opaque, size_t nmemb, size_t size)
{
void *p = NULL;
try{
    p = new char [size];
}
catch(std::bad_alloc &ba)
{
    p = NULL;
}
return p;
}

extern "C" void lz_free(void *opaque, void *ptr)
{
delete [] (char*)ptr;
}


QByteArray lzCompress(QByteArray data)
{
QByteArray arr;
lzma_check check = LZMA_CHECK_CRC64;
lzma_stream strm = LZMA_STREAM_INIT; /* alloc and init lzma_stream struct */
lzma_allocator al;
al.alloc = lz_alloc;
al.free = lz_free;
strm.allocator = &al;
byte *in_buf;
byte out_buf [OUT_BUF_MAX];
size_t in_len;  /* length of useful data in in_buf */
size_t out_len; /* length of useful data in out_buf */
lzma_ret ret_xz;

/* initialize xz encoder */
ret_xz = lzma_easy_encoder (&strm, 9 | LZMA_PRESET_EXTREME, check);
if (ret_xz != LZMA_OK) {
    return QByteArray();
}

in_len = data.size();
in_buf = (byte*)data.data();
strm.next_in = in_buf;
strm.avail_in = in_len;

do {
    strm.next_out = out_buf;
    strm.avail_out = OUT_BUF_MAX;
    ret_xz = lzma_code (&strm, LZMA_FINISH);

    out_len = OUT_BUF_MAX - strm.avail_out;
    arr.append((char*)out_buf, out_len);
    out_buf[0] = 0;
} while (strm.avail_out == 0);
lzma_end (&strm);
return arr;
}

i have sleeped few hours, and now i thinking more clearly, fixed my wrong code, updated it(it's behave just as qCompress works)

UPD3:

decompression code (qUncompress like behavior )

QByteArray lzUncompress(QByteArray data)
{
lzma_stream strm = LZMA_STREAM_INIT; /* alloc and init lzma_stream struct */
const uint32_t flags = LZMA_TELL_UNSUPPORTED_CHECK | LZMA_CONCATENATED;
const uint64_t memory_limit = UINT64_MAX; /* no memory limit */
byte *in_buf;
uint8_t out_buf [OUT_BUF_MAX];
size_t in_len;  /* length of useful data in in_buf */
size_t out_len; /* length of useful data in out_buf */
lzma_ret ret_xz;
QByteArray arr;

ret_xz = lzma_stream_decoder (&strm, memory_limit, flags);
if (ret_xz != LZMA_OK) {
    return QByteArray();
}

in_len = data.size();
in_buf = (byte*)data.data();

strm.next_in = in_buf;
strm.avail_in = in_len;
do {
    strm.next_out = out_buf;
    strm.avail_out = OUT_BUF_MAX;
    ret_xz = lzma_code (&strm, LZMA_FINISH);

    out_len = OUT_BUF_MAX - strm.avail_out;
    arr.append((char*)out_buf, out_len);
    out_buf[0] = 0;
} while (strm.avail_out == 0);
lzma_end (&strm);
return arr;
}

UPD4:

basic stream decompression class, following code just decompress xz stream downloaded from http server on the fly, exactly what i need:

class lz_stream_decompressor : public QObject
{
Q_OBJECT
public:
lz_stream_decompressor(QNetworkReply *r, QNetworkAccessManager *q, const QString &str, unsigned long sz): flags(LZMA_TELL_UNSUPPORTED_CHECK | LZMA_CONCATENATED), memory_limit(UINT64_MAX), state(0), total_upd_size(sz)
{
    repl = r;
    qnm = q;
    path = str;
    strm.next_in = NULL;
    strm.avail_in = 0;
    strm.total_in = 0;
    strm.next_out = NULL;
    strm.avail_out = 0;
    strm.total_out = 0;
    strm.allocator = NULL;
    strm.internal = NULL;
    strm.reserved_ptr1 = NULL;
    strm.reserved_ptr2 = NULL;
    strm.reserved_ptr3 = NULL;
    strm.reserved_ptr4 = NULL;
    strm.reserved_int1 = 0;
    strm.reserved_int2 = 0;
    strm.reserved_int3 = 0;
    strm.reserved_int4 = 0;
    strm.reserved_enum1 = LZMA_RESERVED_ENUM;
    strm.reserved_enum2 = LZMA_RESERVED_ENUM;
    ret_xz = lzma_stream_decoder (&strm, memory_limit, flags);
    if (ret_xz != LZMA_OK)
    {
        state = -1;
        repl->abort();
    }
    else
    {
        connect(repl, SIGNAL(downloadProgress(qint64,qint64)), SLOT(handle_new_data(qint64,qint64)));
        connect(q, SIGNAL(finished(QNetworkReply*)), SLOT(compressed_file_request_finished(QNetworkReply*)));
        QFile(path).rename(path + ".tmp");
    }
}
~lz_stream_decompressor()
{
/*        if(repl)
        delete repl; */
    lzma_end (&strm);
}
const short get_state()
{
    return state;
}
signals:
void finished();

public slots:
void handle_new_data(qint64 bytesReceived, qint64 bytesTotal);
void compressed_file_request_finished(QNetworkReply*);
private:
QNetworkReply *repl;
QNetworkAccessManager *qnm;
lzma_stream strm;
const uint32_t flags;
const uint64_t memory_limit; /* no memory limit */
short state;
byte *in_buf;
byte out_buf [OUT_BUF_MAX];
size_t in_len;  /* length of useful data in in_buf */
size_t out_len; /* length of useful data in out_buf */
lzma_ret ret_xz;
QString path;
unsigned long &total_upd_size;
};

and realisation:

void lz_stream_decompressor::handle_new_data(qint64 bytesReceived, qint64 bytesTotal)
{
if(repl->error() != QNetworkReply::NoError)
{//TODO: handle error here
    QFile(path).remove();
    QFile(path + ".tmp").rename(path);
    return;
}
total_upd_size -= repl->bytesAvailable();
QByteArray data = repl->readAll();
in_len = data.size();
in_buf = (byte*)data.data();
strm.next_in = in_buf;
strm.avail_in = in_len;

do {
    strm.next_out = out_buf;
    strm.avail_out = OUT_BUF_MAX;
    ret_xz = lzma_code (&strm, LZMA_RUN);
    out_len = OUT_BUF_MAX - strm.avail_out;
    QFile file(path);
    if(file.open(QIODevice::WriteOnly | QIODevice::Append))
    {
        file.write(QByteArray((char*)out_buf, (int)out_len));
        file.close();
    }
    out_buf[0] = 0;
} while (strm.avail_out == 0);
}

void lz_stream_decompressor::compressed_file_request_finished(QNetworkReply* repl)
{
if(repl->error() != QNetworkReply::NoError)
{//TODO: handle error here
    QFile(path).remove();
    QFile(path + ".tmp").rename(path);
    emit finished();
    return;
}
total_upd_size -= repl->bytesAvailable();
QByteArray data = repl->readAll();
in_len = data.size();
in_buf = (byte*)data.data();
strm.next_in = in_buf;
strm.avail_in = in_len;

do {
    strm.next_out = out_buf;
    strm.avail_out = OUT_BUF_MAX;
    ret_xz = lzma_code (&strm, LZMA_FINISH);
    out_len = OUT_BUF_MAX - strm.avail_out;
    QFile file(path);
    if(file.open(QIODevice::WriteOnly | QIODevice::Append))
    {
        file.write(QByteArray((char*)out_buf, (int)out_len));
        file.close();
    }
    out_buf[0] = 0;
} while (strm.avail_out == 0);
repl->deleteLater();
QFile(path + ".tmp").remove();
emit finished();
}

all this based on example from first link, you need to replace commented code parts with your code to do something with uncompressed data.

i would like to see any suggestions to this code

you also need to connect "compressed_file_request_finished" slot to finished signal of qnetworkmanager to finish uncompressed data.

UPD5:

fixed lzCompress and lzUncompress, looks like working fine now, not sure about using LZMA_FULL_FLUSH in handle_new_data, as i read this is what i need, but still not sure, now i adapting existing code to use this...

UPD6:

you also need something like this:

/* read/write buffer sizes */
#define IN_BUF_MAX  409600
#define OUT_BUF_MAX 409600
/* analogous to xz CLI options: -0 to -9 */
#define COMPRESSION_LEVEL 7

/* boolean setting, analogous to xz CLI option: -e */
#define COMPRESSION_EXTREME true

in visible range for this code to work.

UPD7:

updated code, all tested and working, i have found that liblzma not completely thread-safe, i tried to make multi-threaded compression of filelist. and it crashing very often.

like image 808
sss123next Avatar asked Jun 26 '11 21:06

sss123next


People also ask

What is LZMA used for?

The Lempel–Ziv–Markov chain algorithm (LZMA) is an algorithm used to perform lossless data compression. It has been under development since either 1996 or 1998 by Igor Pavlov and was first used in the 7z format of the 7-Zip archiver.

What is LZMA package?

lzma: LZMA/XZ compression and decompression This package provides a pure interface for compressing and decompressing LZMA (Lempel–Ziv–Markov chain algorithm) streams of data represented as lazy ByteString s. A monadic incremental interface is provided as well. This package relies on the liblzma C library.

What is LZMA module in Python?

The Lempel–Ziv–Markov chain algorithm(LZMA) performs lossless data compression using a dictionary compression scheme featuring a higher compression ratio than other compression algorithms. Python's lzma module consists of classes and convenience functions for compression and decompression of data with LZMA algorithm.

What is LZMA SDK?

The LZMA SDK provides the documentation, samples, header files, libraries, and tools you need to develop applications that use LZMA compression.


1 Answers

On this page, you will find the lzma SDK which provides source codes in different languages and some binaries: http://www.7-zip.org/sdk.html

You have two solutions:

  • Use the C++ source code to decompress the incoming flow
  • Use the decoder binaries as an external tool in your app
like image 174
Patrice Bernassola Avatar answered Oct 04 '22 15:10

Patrice Bernassola