Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Building array of objects from parsed csv files in node

I have multiple csv files of the form

  • model1A
  • model1B
  • model2A
  • model2B

where each csv is an array i.e. model1A = [1, 1, 1]

I want to parse these csvs and create a single array containing all these models, where each element in the array is an object corresponding to one particular model, i.e.

finalArray = [ 
  { 
    "model" :   "model1",
    "A"     :   [1, 1, 1],
    "B"     :   [2, 2, 2]
  },
  { 
    "model" :   "model2",
    "A"     :   [3, 3, 3],
    "B"     :   [4, 4, 4]
  }
]

The code I have so far is

var csv = require('csv');
var fs = require('fs');
var parser = csv.parse();
var util = require('util');
var junk = require('junk');
var _ = require('lodash');
var models = [];


fs.readdir(__dirname+'/data', function(err, files) {
    var model = {};
    _.forEach(files, function(n, key) {

        console.log('Analysing file: ' + n);
        var modelName;
        var modelNum;
        var modelParam;


        modelNum = n.match(/\d+/)[0];
        modelName = 'model' + modelNum;
        modelParam = (n.substring(0, n.indexOf('.'))).replace(modelName,'');

        model.model = modelName;
        model[modelParam] = [];
        models.push(model);

        //if (Object.keys(model).length === 3) {
        //    models.push(model);
        //    model = {};
        //}


        fs.createReadStream(__dirname+'/data/'+n).pipe(csv.parse()).pipe(csv.transform(function(row) {
            model[modelParam].push(row);

        })).on('readable', function(){
            while(this.read()){}
        }).on('end', function() {
            console.log('finished reading file ' + n);
            if (key === (files.length - 1)) {
                fs.writeFile('result.json', JSON.stringify(models), function (err) {
                    if (err) throw err;
                    console.log(models.length + ' model(s) parsed');
                    console.log('done');
                });
            }

        }).on('error', function(error) {
            console.log(error);
        });    
    });
});

I know one of my issues is I am pushing the model to the array to soon, resulting in a final array of the form below, where model1 is being overwritten by model2

[ { model: 'model2', A: [], B: [] },
  { model: 'model2', A: [], B: [] },
  { model: 'model2', A: [], B: [] },
  { model: 'model2', A: [], B: [] } ]

That's why I tried this code

if (Object.keys(model).length === 3) {
  models.push(model);
  model = {};
}

but of course this couldn't work because the fs.createReadStream is async and I am clearing the model with model = {} before it can run properly.

I'm at the stage now where I feel I'm going around in circles and just making things worse. I wanted to create something more generic, however, now I would be delighted to get it working for the case presented here and then I can look at improving it.

Any help would be really appreciated!


Update 1

Following saquib khan's suggestion of moving the var model = {} inside the loop has helped get me closer to my goal, but it's still not right. Below is the current result

[
    {
        "model": "model1",
        "A": [
            [
                "1"
            ],
            [
                "2"
            ],
            [
                "3"
            ],
            [
                "4"
            ]
        ]
    },
    {
        "model": "model1",
        "B": [
            [
                "1"
            ],
            [
                "2"
            ],
            [
                "3"
            ],
            [
                "4"
            ]
        ]
    },
    {
        "model": "model2",
        "A": [
            [
                "1"
            ],
            [
                "2"
            ],
            [
                "3"
            ],
            [
                "4"
            ]
        ]
    },
    {
        "model": "model2",
        "B": [
            [
                "1"
            ],
            [
                "2"
            ],
            [
                "3"
            ],
            [
                "4"
            ]
        ]
    }
]

Update 2

Also following Denys Denysiuk's suggestion, the result is closer to what I want, but still just short

[
    {
        "model": "model1",
        "A": [
            "1",
            "2",
            "3",
            "4"
        ]
    },
    {
        "model": "model1",
        "B": [
            "1",
            "2",
            "3",
            "4"
        ]
    },
    {
        "model": "model2",
        "A": [
            "1",
            "2",
            "3",
            "4"
        ]
    },
    {
        "model": "model2",
        "B": [
            "1",
            "2",
            "3",
            "4"
        ]
    }
]

This would work, if I could just somehow iterate over that final array of objects, merging objects with a matching model name. I'm currently looking through the lodash docs to see if I can figure something out. I will post back here if I do.

like image 970
Philip O'Brien Avatar asked Jun 10 '15 09:06

Philip O'Brien


People also ask

How do I convert a CSV file to an array?

To convert or parse CSV data into an array , you need to use JavaScript's FileReader class, which contains a method called readAsText() that will read a CSV file data and parse the result as a string text. The FileReader class is a web API, so this solution only works in the browser.

How do I parse a CSV file in node JS?

You will use the fs module's createReadStream() method to read the data from the CSV file and create a readable stream. You will then pipe the stream to another stream initialized with the csv-parse module to parse the chunks of data. Once the chunks of data have been parsed, you can log them in the console.

How do you create an array in node?

Creating an Array Object var array = []; Option two is to create an Array object by instantiating the Array object. var array = new Array(); The last option is to create an Array object by inserting collection data.


2 Answers

There is a very small coding error in your code.

var model = {}; should be inside forEach loop.

Try below code:

var csv = require('csv');
var fs = require('fs');
var parser = csv.parse();
var util = require('util');
var junk = require('junk');
var _ = require('lodash');
var models = [];


fs.readdir(__dirname+'/data', function(err, files) {

    _.forEach(files, function(n, key) {

        console.log('Analysing file: ' + n);
        var model = {};
        var modelName;
        var modelNum;
        var modelParam;


        modelNum = n.match(/\d+/)[0];
        modelName = 'model' + modelNum;
        modelParam = (n.substring(0, n.indexOf('.'))).replace(modelName,'');

        model.model = modelName;
        model[modelParam] = [];
        models.push(model);

        //if (Object.keys(model).length === 3) {
        //    models.push(model);
        //    model = {};
        //}


        fs.createReadStream(__dirname+'/data/'+n).pipe(csv.parse()).pipe(csv.transform(function(row) {
            model[modelParam].push(row);

        })).on('readable', function(){
            while(this.read()){}
        }).on('end', function() {
            console.log('finished reading file ' + n);
            if (key === (files.length - 1)) {
                fs.writeFile('result.json', JSON.stringify(models), function (err) {
                    if (err) throw err;
                    console.log(models.length + ' model(s) parsed');
                    console.log('done');
                });
            }

        }).on('error', function(error) {
            console.log(error);
        });    
    });
});
like image 109
saquib khan Avatar answered Sep 20 '22 09:09

saquib khan


Try this out:

fs.readdir(__dirname+'/data', function(err, files) {

    _.forEach(files, function(n, key) {

        console.log('Analysing file: ' + n);            

        var modelNum = n.match(/\d+/)[0];
        var modelName = 'model' + modelNum;
        var modelParam = (n.substring(0, n.indexOf('.'))).replace(modelName,'');

        var model = {};
        var isNewModel = true;
        for(var i = 0; i < models.length; i++) {
            if(models[i].model == modelName) {
               model = models[i];
               isNewModel = false;
               break;
            }
        }
        if(isNewModel) {
            model.model = modelName;
            models.push(model);
        }

        model[modelParam] = [];

        fs.createReadStream(__dirname+'/data/'+n).pipe(csv.parse()).pipe(csv.transform(function(row) {
            model[modelParam].push(row[0]);

        })).on('readable', function(){
            while(this.read()){}
        }).on('end', function() {
            console.log('finished reading file ' + n);
            if (key === (files.length - 1)) {
                fs.writeFile('result.json', JSON.stringify(models), function (err) {
                    if (err) throw err;
                    console.log(models.length + ' model(s) parsed');
                    console.log('done');
                });
            }

        }).on('error', function(error) {
            console.log(error);
        });    
    });
like image 21
Denys Denysiuk Avatar answered Sep 21 '22 09:09

Denys Denysiuk