We've got a Node.js script that is run once a minute to check the status of our apps. Usually, it works just fine. If the service is up, it exits with 0. If it's down, it exits with 1. All is well.
But every once in a while, it just kinda stops. The console reports "Calling status API..." and stops there indefinitely. It doesn't even timeout at Node's built-in two-minute timeout. No errors, nothing. It just sits there, waiting, forever. This is a problem, because it blocks following status check jobs from running.
At this point, my whole team has looked at it and none of us can figure out what circumstance could make it hang. We've built in a start-to-finish timeout, so that we can move on to the next job, but that essentially skips a status check and creates blind spots. So, I open the question to you fine folks.
Here's the script (with names/urls removed):
#!/usr/bin/env node
// SETTINGS: -------------------------------------------------------------------------------------------------
/** URL to contact for status information. */
const STATUS_API = process.env.STATUS_API;
/** Number of attempts to make before reporting as a failure. */
const ATTEMPT_LIMIT = 3;
/** Amount of time to wait before starting another attempt, in milliseconds. */
const ATTEMPT_DELAY = 5000;
// RUNTIME: --------------------------------------------------------------------------------------------------
const URL = require('url');
const https = require('https');
// Make the first attempt.
make_attempt(1, STATUS_API);
// FUNCTIONS: ------------------------------------------------------------------------------------------------
function make_attempt(attempt_number, url) {
console.log('\n\nCONNECTION ATTEMPT:', attempt_number);
check_status(url, function (success) {
console.log('\nAttempt', success ? 'PASSED' : 'FAILED');
// If this attempt succeeded, report success.
if (success) {
console.log('\nSTATUS CHECK PASSED after', attempt_number, 'attempt(s).');
process.exit(0);
}
// Otherwise, if we have additional attempts, try again.
else if (attempt_number < ATTEMPT_LIMIT) {
setTimeout(make_attempt.bind(null, attempt_number + 1, url), ATTEMPT_DELAY);
}
// Otherwise, we're out of attempts. Report failure.
else {
console.log("\nSTATUS CHECK FAILED");
process.exit(1);
}
})
}
function check_status(url, callback) {
var handle_error = function (error) {
console.log("\tFailed.\n");
console.log('\t' + error.toString().replace(/\n\r?/g, '\n\t'));
callback(false);
};
console.log("\tCalling status API...");
try {
var options = URL.parse(url);
options.timeout = 20000;
https.get(options, function (response) {
var body = '';
response.setEncoding('utf8');
response.on('data', function (data) {body += data;});
response.on('end', function () {
console.log("\tConnected.\n");
try {
var parsed = JSON.parse(body);
if ((!parsed.started || !parsed.uptime)) {
console.log('\tReceived unexpected JSON response:');
console.log('\t\t' + JSON.stringify(parsed, null, 1).replace(/\n\r?/g, '\n\t\t'));
callback(false);
}
else {
console.log('\tReceived status details from API:');
console.log('\t\tServer started:', parsed.started);
console.log('\t\tServer uptime:', parsed.uptime);
callback(true);
}
}
catch (error) {
console.log('\tReceived unexpected non-JSON response:');
console.log('\t\t' + body.trim().replace(/\n\r?/g, '\n\t\t'));
callback(false);
}
});
}).on('error', handle_error);
}
catch (error) {
handle_error(error);
}
}
If any of you can see any places where this could possibly hang without output or timeout, that'd be very helpful!
Thank you, James Tanner
EDIT: p.s. We use https
directly, instead of request
so that we don't need to do any installation when the script runs. This is because the script can run on any build machine assigned to Jenkins without a custom installation.
Aren't you missing the .end()
?
http.request(options, callback).end()
Something like explained here.
Inside your response callback your not checking the status..
The .on('error', handle_error);
is for errors that occur connecting to the server, status code errors are those that the server responds with after a successful connection.
Normally a 200 status response is what you would expect from a successful request..
So a small mod to your http.get to handle this should do..
eg.
https.get(options, function (response) {
if (response.statusCode != 200) {
console.log('\tHTTP statusCode not 200:');
callback(false);
return; //no point going any further
}
....
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With