Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Node JS/Gzip: Image file download ends prematurely with no error

I am creating a application in Node.js to download image files. However, I have been having an issue where if my download speed is slow or I lose connection the image I am downloading will be truncated. This would not be so bad if my program threw an error warning me that the image did not finish downloading, however, it does not and resolves as if the image downloaded successfully. I do not receive an error when I check the response code or probe the image after downloading.

This is the current code I am using to download images.

const fs = require('fs-extra');
const request = require('request');
var probe = require('probe-image-size');
var progress = require('request-progress');

var filename = 'C:/Users/User/Desktop/myimage.jpg';
var req = request(createRequestHeaders('www.linktomyimage.com/image.jpg'));
downloadImage(req, filename)

function createRequestHeaders(url) {
    var cookie = `userid=${userid}; phash=${phash};`;
    return {
        'url': url,
        'method': 'GET',
        'gzip': true,
        'headers': {
            'Referer': `https://${website}/`,
            'Cookie': cookie
        }
    };
}

function downloadImage(req, filename) {
    return new Promise((resolve, reject) => {
        var response = null;
        var bytes;
        var dirname = path.dirname(filename);
        if (!fs.existsSync(dirname)) fs.ensureDirSync(dirname);

        progress(req, { delay: 0 }).on('progress', function (state) {
            updateDownloadSpeed(state.speed);
        }).on('end', function () {
            if (response.statusCode == 200) {
                var input = require('fs').createReadStream(filename);
                probe(input).then(result => {
                    input.destroy();
                    if (result != null) {
                        resolve({bytes: bytes, width: result.width,
                                height: result.height,});
                    } else {
                        // The image size probe does not detect if the download was truncated
                        reject({ 'name': 'ImageMissingOrCorrupt');
                    }
                }).catch((error) => {
                    reject(error);
                });
            } else {
                // This is never triggered when the download stops and the image is truncated
                reject({ 'name': 'StatusCodeError', 'message': response.statusCode });
            }
        }).on('response', function (resp) {
            response = resp;
            bytes = response.headers['content-length'];
        }).on('error', function (error) {
            // This does not detect when an image is truncated either
            reject(error);
        }).pipe(fs.createWriteStream(filename));
    });
}

I asked an earlier version of this question here but after implementing suggestions from the answer my downloader still has the same issues. The code for my downloader has also changed since then.

How can I detect when an image has been truncated so I can instruct the downloader to reattempt the download it?

Edit 1

After reading this I think my issue might have something to do with the website I'm downloading from requiring me to use gzip and the client not being able to tell if the content has actually finished downloading when the server stops the response. However, I am not sure how to test for this happening.

Edit 2

This is what the response header looks like when my downloadedr successfully connects to an image.

cache-control:"public, max-age=31536000"
connection:"close"
content-disposition:"inline; filename=129.jpg"
content-length:"185756"
content-transfer-encoding:"binary"
content-type:"image/jpeg"
date:"Thu, 05 Sep 2019 00:15:11 GMT"
expires:"Fri, 04 Sep 2020 00:15:11 GMT"
server:"Apache"
like image 518
Settings Menu Hard to Find Avatar asked Aug 31 '19 06:08

Settings Menu Hard to Find


1 Answers

I would recommend you trying to check that the writeStream's bytesWritten property equals to the content-length header you are receiving.

I have been playing around with your code and I have found that the end event for the request gets fired before the writeStream is closed, so I think there is actually no way for you to check this property at this point. Instead you should validate it on the close event of your writeStream.

Try this sample code and tell us how it goes:

const fs = require('fs-extra');
const request = require('request');
var probe = require('probe-image-size');
var progress = require('request-progress');

var filename = 'C:/Users/User/Desktop/myimage.jpg';
var writeSteam = fs.createWriteStream(filename)
var req = request(createRequestHeaders('www.linktomyimage.com/image.jpg'));
downloadImage(req, filename)

function createRequestHeaders(url) {
  var cookie = `userid=${userid}; phash=${phash};`;
  return {
    'url': url,
    'method': 'GET',
    'gzip': true,
    'headers': {
      'Referer': `https://${website}/`,
      'Cookie': cookie
    }
  };
}

function downloadImage(req, filename) {
  return new Promise((resolve, reject) => {
    var response = null;
    var bytes;
    var dirname = path.dirname(filename);
    if (!fs.existsSync(dirname)) fs.ensureDirSync(dirname);

    // NEW CODE
    writeSteam.on('close', () => {
      if (bytes !== writeSteam.bytesWritten) { // NEW CODE
        // Here the write stream is closed, so we can compare the property bytesWritten with the bytes we expected to receive
        console.log('The size is not equal! Image is corrupt!')
        reject({ 'name': 'ImageCorrupt'})
      } else if (response.statusCode === 200) {
        var input = require('fs').createReadStream(filename);
        probe(input).then(result => {
          input.destroy();
          if (result != null) {
            resolve({bytes: bytes, width: result.width,
              height: result.height,});
          } else {
            // The image size probe does not detect if the download was truncated
            reject({ 'name': 'ImageMissingOrCorrupt'});
          }
        }).catch((error) => {
          reject(error);
        });
      } else {
        // This is never triggered when the download stops and the image is truncated
        reject({ 'name': 'StatusCodeError', 'message': response.statusCode });
      }
    })

    progress(req, { delay: 0 }).on('progress', function (state) {
      updateDownloadSpeed(state.speed);
    }).on('end', function () {
      console.log('Ended request!!') // NEW CODE
    }).on('response', function (resp) {
      response = resp;
      bytes = response.headers['content-length'];
    }).on('error', function (error) {
      // This does not detect when an image is truncated either
      reject(error);
    }).pipe(writeSteam);
  });
}
like image 128
Marcos Luis Delgado Avatar answered Oct 31 '22 08:10

Marcos Luis Delgado