Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How can I use these Node modules to accept HTML through a file or URL and then output JSON as validation of existing HTML elements?

Essentially what I need to do is to take a local grader.js file and then use it at the command line to input HTML, which will then output JSON data to the console to validate the existence of several HTML elements. The usage looks something like this:

./grader.js --checks checks.json --file index.html
./grader.js --checks checks.json --url http://google.com

The Node modules being used are Commander (for working at the command line), Cheerio (for HTML), and Restler (for getting HTML from URL).

The checks.json file is straightforward in that it's simply asking to check for the existence of a few simple HTML elements to find out whether or not they exist on the page:

["h1",
 ".navigation",
 ".logo",
 ".blank",
 ".about",
 ".heading",
 ".subheading",
 ".pitch",
 ".video",
 ".thermometer",
 ".order",
 ".social",
 ".section1",
 ".section2",
 ".faq",
 ".footer"]

The grader.js file is where things get a little more complicated. The following code actually works insofar as it takes the command line arguments and does indicate a true or false value as to whether the HTML elements exist. But it doesn't work properly after adding the URL check at the bottom. There is something wrong with my checkURL function and the way that I implement it using the Commander code at the bottom. Even though the true and false values are correct dependent upon the HTML file/URL I use, I end up spitting out both checks to the console even if I only want to check either the file or the URL, not both. I'm fairly new to this so I'm surprised that it works at all. It may have something to do with the default values, but when I try to make those changes the checkURL function seems to break down. Thanks in advance for your help I really do appreciate it.

#!/usr/bin/env node
var fs = require('fs');
var program = require('commander');
var cheerio = require('cheerio');
var rest = require('restler');
var HTMLFILE_DEFAULT = "index.html";
var CHECKSFILE_DEFAULT = "checks.json";
var URL_DEFAULT = "http://cryptic-spire-7925.herokuapp.com/index.html";

var assertFileExists = function(infile) {
    var instr = infile.toString();
    if(!fs.existsSync(instr)) {
        console.log("%s does not exist. Exiting.", instr);
        process.exit(1); // http://nodejs.org/api/process.html#process_process_exit_code
    }
    return instr;
};

var cheerioHtmlFile = function(htmlfile) {
    return cheerio.load(fs.readFileSync(htmlfile));
};

var loadChecks = function(checksfile) {
    return JSON.parse(fs.readFileSync(checksfile));
};

var checkHtmlFile = function(htmlfile, checksfile) {
    $ = cheerioHtmlFile(htmlfile);
    var checks = loadChecks(checksfile).sort();
    var out = {};
    for(var ii in checks) {
        var present = $(checks[ii]).length > 0;
        out[checks[ii]] = present;
    }
    return out;
};

var checkUrl = function(url, checksfile) {
    rest.get(url).on('complete', function(data) {
                $ = cheerio.load(data);
            var checks = loadChecks(checksfile).sort();
            var out = {};
            for(var ii in checks) {
                var present = $(checks[ii]).length > 0;
                out[checks[ii]] = present;
            }
            console.log(out);
        });
}

var clone = function(fn) {
    // Workaround for commander.js issue.
    // http://stackoverflow.com/a/6772648
    return fn.bind({});
};

if(require.main == module) {
    program
        .option('-f, --file <html_file>', 'Path to index.html', clone(assertFileExists), HTMLFILE_DEFAULT)
        .option('-u, --url <url>', 'URL to index.html', URL_DEFAULT)
        .option('-c, --checks <check_file>', 'Path to checks.json', clone(assertFileExists), CHECKSFILE_DEFAULT)
        .parse(process.argv);

    var checkJson = checkHtmlFile(program.file, program.checks);
    var outJson = JSON.stringify(checkJson, null, 4);
    console.log(outJson);

    var checkJson2 = checkUrl(program.url, program.checks);
    var outJson2 = JSON.stringify(checkJson2, null, 4);
    console.log(outJson2);

} 
else {
    exports.checkHtmlFile = checkHtmlFile;
}
like image 475
Bijan Avatar asked Jul 10 '13 15:07

Bijan


3 Answers

Depending on the arguments call either one of checkHtmlFile() or checkUrl()

Something like:

if (program.url)
   checkUrl(program.url, program.checks);
else checkHtmlFile(program.file, program.checks);

Read this for more references: commander.js option parsing

Also, checkJson2 is undefined as checkUrl() isn't returning anything.

like image 58
ankitsabharwal Avatar answered Nov 05 '22 21:11

ankitsabharwal


Those commander .option lines look wrong to me.

Delete the clone function and revise your option lines as follows:

    .option('-f, --file <html_file>', 'Path to index.html', HTMLFILE_DEFAULT)
    .option('-u, --url <url>', 'URL to index.html', URL_DEFAULT)
    .option('-c, --checks <check_file>', 'Path to checks.json', CHECKSFILE_DEFAULT)

This should solve your commander problem.

like image 29
David Avatar answered Nov 05 '22 20:11

David


Here is the updated checkUrl function after the helpful hints from @David and @ankitsabharwal.

var checkUrl = function(url, checksfile) {
    rest.get(url).on('complete', function(data) {
        $ = cheerio.load(data);
        var checks = loadChecks(checksfile).sort();
        var out = {};
        for(var ii in checks) {
            var present = $(checks[ii]).length > 0;
            out[checks[ii]] = present;
        }
        var outJson = JSON.stringify(out, null, 4);
        console.log(outJson);
    });
}

And here is the updated Commander code below:

if(require.main == module) {
    program
        .option('-f, --file <html_file>', 'Path to index.html')
        .option('-u, --url <url>', 'URL to index.html')
        .option('-c, --checks <check_file>', 'Path to checks.json')
        .parse(process.argv);

    if (program.url) {
        checkUrl(program.url, program.checks);
    } else {
        checkHtmlFile (program.file, program.checks);

        var checkJson = checkHtmlFile(program.file, program.checks);
        var outJson = JSON.stringify(checkJson, null, 4);
        console.log(outJson);
    }
} 
like image 3
Bijan Avatar answered Nov 05 '22 21:11

Bijan