I'm coding a function that runs API calls and requests JSON from a huge database in sequence via offsets. The JSON response is parsed and then the subsequent data within is uploaded to our Cloud Firestore server.
Nodejs (Node 6.11.3) & Latest Firebase Admin SDK
The information is parsed as expected, and prints to the console perfectly. When the data attempts to upload to our Firestore database however, the console is spammed with the error message:
Auth error:Error: socket hang up
(node:846) UnhandledPromiseRejectionWarning: Unhandled promise rejection (rejection id: -Number-): Error: Getting metadata from plugin failed with error: socket hang up
and occasionally:
Auth error:Error: read ECONNRESET
The forEach function collects the items from the downloaded JSON and processes the data before uploading to the Firestore database. Each JSON has up to 1000 items of data (1000 documents worth) to pass through the forEach function. I understand that this might be a problem if the function repeats before the upload set finishes?
I'm a coding newbie and understand that the control flow of this function isn't the best. However, I can't find any information on the error that the console prints. I can find plenty of information on socket hang ups, but none on the Auth error section.
I'm using a generated service account JSON as a credential to access our database, which uses the firebase-adminsdk account. Our read/write rules for the database are currently open to allow any access (as we're in development with no real users).
Here's my function:
Firebase initialisation & offset zero-ing
const admin = require('firebase-admin');
var serviceAccount = require("JSON");
admin.initializeApp({
credential: admin.credential.cert(serviceAccount),
databaseURL: "URL"
});
var db = admin.firestore();
var offset = 0;
var failed = false;
Running the function & setting HTTP Headers
var runFunction = function runFunction() {
var https = require('https');
var options = {
host: 'website.com',
path: (path including an offset and 1000 row specifier),
method: 'GET',
json: true,
headers: {
'content-type': 'application/json',
'Authorization': 'Basic ' + new Buffer('username' + ':' + 'password').toString('base64')
}
};
Running the HTTP Request & Re-running the function if we haven't reached the end of the response from the API
if (failed === false) {
var req = https.request(options, function (res) {
var body = '';
res.setEncoding('utf8');
res.on('data', function (chunk) {
body += chunk;
});
res.on('end', () => {
console.log('Successfully processed HTTPS response');
body = JSON.parse(body);
if (body.hasOwnProperty('errors')) {
console.log('Body ->' + body)
console.log('API Call failed due to server error')
console.log('Function failed at ' + offset)
req.end();
return
} else {
if (body.hasOwnProperty('result')) {
let result = body.result;
if (Object.keys(result).length === 0) {
console.log('Function has completed');
failed = true;
return;
} else {
result.forEach(function (item) {
var docRef = db.collection('collection').doc(name);
console.log(name);
var upload = docRef.set({
thing: data,
thing2: data,
})
});
console.log('Finished offset ' + offset)
offset = offset + 1000;
failed = false;
}
if (failed === false) {
console.log('Function will repeat with new offset');
console.log('offset = ' + offset);
req.end();
runFunction();
} else {
console.log('Function will terminate');
}
}
}
});
});
req.on('error', (err) => {
console.log('Error -> ' + err)
console.log('Function failed at ' + offset)
console.log('Repeat from the given offset value or diagnose further')
req.end();
});
req.end();
} else {
req.end();
}
};
runFunction();
Any help would be greatly appreciated!
UPDATE
I've just tried changing the rows of JSON that I pull at a time and subsequently upload at a time using the function - from 1000 down to 100. The socket hang up errors are less frequent so it is definitely due to overloading the database.
Ideally it would be perfect if each forEach array iteration waited for the previous iteration to complete before commencing.
UPDATE #2
I've installed the async module and I'm currently using the async.eachSeries function to perform one document upload at a time. All errors mid-upload disappear - however the function will take an insane amount of time to finish (roughly 9 hours for 158,000 documents). My updated loop code is this, with a counter implemented:
async.eachSeries(result, function (item, callback) {
// result.forEach(function (item) {
var docRef = db.collection('collection').doc(name);
console.log(name);
var upload = docRef.set({
thing: data,
thing2: data,
}, { merge: true }).then(ref => {
counter = counter + 1
if (counter == result.length) {
console.log('Finished offset ' + offset)
offset = offset + 1000;
console.log('Function will repeat with new offset')
console.log('offset = ' + offset);
failed = false;
counter = 0
req.end();
runFunction();
}
callback()
});
});
Also, after a period of time the database returns this error:
(node:16168) UnhandledPromiseRejectionWarning: Unhandled promise rejection (rejection id: -Number-): Error: The datastore operation timed out, or the data was temporarily unavailable.
It seems as if now my function is taking too long... instead of not long enough. Does anyone have any advice on how to make this run faster without stated errors?
The write requests as part of this loop were simply exceeding Firestore's quota - thus the server was rejecting the majority of them.
To solve this issue I converted my requests to upload in chunks of 50 or so items at a time, with Promises confirming when to move onto the next chunk upload.
The answer was posted here -> Iterate through an array in blocks of 50 items at a time in node.js, and the template for my working code is as below:
async function uploadData(dataArray) {
try {
const chunks = chunkArray(dataArray, 50);
for (const [index, chunk] of chunks.entries()) {
console.log(` --- Uploading ${index + 1} chunk started ---`);
await uploadDataChunk(chunk);
console.log(`---Uploading ${index + 1} chunk finished ---`);
}
} catch (error) {
console.log(error)
// Catch en error here
}
}
function uploadDataChunk(chunk) {
return Promise.all(
chunk.map((item) => new Promise((resolve, reject) => {
setTimeout(
() => {
console.log(`Chunk item ${item} uploaded`);
resolve();
},
Math.floor(Math.random() * 500)
);
}))
);
}
function chunkArray(array, chunkSize) {
return Array.from(
{ length: Math.ceil(array.length / chunkSize) },
(_, index) => array.slice(index * chunkSize, (index + 1) * chunkSize)
);
}
Pass the data array through to uploadData - using uploadData(data); and post your upload code for each item into uploadDataChunk inside the setTimeout block (before the resolve() line) within the chunk.map function.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With