Convert JSON file to a CSV file using R

Tags:

I have the JSON file in a .txt file which I'm trying to load into R but I'm getting the following error:

Error in feed_push_parser(readBin(con, raw(), n), reset = TRUE) : 
  parse error: trailing garbage
      " : "SUCCESS"  }    /* 1 */  {    "_id" : "b736c374-b8ae-4e9
                 (right here) ------^

I'm assuming the error is because of multiple instances of /* (number) */ and I can't manually remove them all as my file has 10k instances of these. Is there a way to remove such instances before loading the data into R?

My JSON file looks like below:

/* 0 */
{
  "_id" : "93ccbdb6-8947",
  "uiSearchRequest" : {
    "travelDate" : 20151206,
    "travelDuration" : 7,
    "shopperDuration" : 30,
    "oneWay" : false,
    "userId" : "ATP1KKP",
    "queryId" : "93ccbdb6-8947",
    "subRequests" : [{
        "origin" : "WAS",
        "destination" : "LON",
        "carrier" : "AA",
        "fareClasses" : "",
        "owrt" : "1,2"
      }]
  },
  "downloadCount" : 0,
  "requestDate" : 20151205,
  "totalRecords" : 0,
  "status" : "SUCCESS"
}

/* 1 */
{
  "_id" : "b736c374-b8ae",
  "uiSearchRequest" : {
    "travelDate" : 20151206,
    "travelDuration" : 7,
    "shopperDuration" : 30,
    "oneWay" : false,
    "userId" : "ATP1KKP",
    "queryId" : "b736c374-b8ae",
    "subRequests" : [{
        "origin" : "WAS",
        "destination" : "LON",
        "carrier" : "AA",
        "fareClasses" : "",
        "owrt" : "1,2"
      }]
  },
  "downloadCount" : 0,
  "requestDate" : 20151205,
  "totalRecords" : 0,
  "status" : "SUCCESS"
}

/* 2 */
{
  "_id" : "3312605f-8304",
  "uiSearchRequest" : {
    "travelDate" : 20151206,
    "travelDuration" : 7,
    "shopperDuration" : 30,
    "oneWay" : false,
    "userId" : "ATP1SXE",
    "queryId" : "3312605f-8304",
    "subRequests" : [{
        "origin" : "LON",
        "destination" : "IAD",
        "carrier" : "AA",
        "fareClasses" : "",
        "owrt" : "1,2"
      }]
  },
  "downloadCount" : 2,
  "requestDate" : 20151205,
  "totalRecords" : 0,
  "status" : "SUCCESS"
}

/* 3 */
{
  "_id" : "6b668cfa-9b79",
  "uiSearchRequest" : {
    "travelDate" : 20151206,
    "travelDuration" : 7,
    "shopperDuration" : 30,
    "oneWay" : false,
    "userId" : "ATP1NXA",
    "queryId" : "6b668cfa-9b79",
    "subRequests" : [{
        "origin" : "WAS",
        "destination" : "LON",
        "carrier" : "AA",
        "fareClasses" : "",
        "owrt" : "1,2"
      }]
  },
  "downloadCount" : 1,
  "requestDate" : 20151205,
  "totalRecords" : 1388,
  "status" : "SUCCESS"
}

/* 4 */
{
  "_id" : "41c373a1-e4cb",
  "uiSearchRequest" : {
    "travelDate" : 20151206,
    "travelDuration" : 7,
    "shopperDuration" : 30,
    "oneWay" : false,
    "userId" : "ATP6CXS",
    "queryId" : "41c373a1-e4cb",
    "subRequests" : [{
        "origin" : "WAS",
        "destination" : "LON",
        "carrier" : "AA",
        "fareClasses" : "",
        "owrt" : "1,2"
      }]
  },
  "downloadCount" : 0,
  "requestDate" : 20151205,
  "totalRecords" : 1388,
  "status" : "SUCCESS"
}

/* 5 */
{
  "_id" : "2c8331c4-21ca",
  "uiSearchRequest" : {
    "travelDate" : 20151206,
    "travelDuration" : 7,
    "shopperDuration" : 30,
    "oneWay" : false,
    "userId" : "ATP1KKP",
    "queryId" : "2c8331c4-21ca",
    "subRequests" : [{
        "origin" : "WAS",
        "destination" : "LON",
        "carrier" : "AA",
        "fareClasses" : "",
        "owrt" : "1,2"
      }]
  },
  "downloadCount" : 0,
  "requestDate" : 20151205,
  "totalRecords" : 1388,
  "status" : "SUCCESS"
}

/* 6 */
{
  "_id" : "71a09900-1c13",
  "uiSearchRequest" : {
    "travelDate" : 20151206,
    "travelDuration" : 7,
    "shopperDuration" : 30,
    "oneWay" : false,
    "userId" : "ATP6CXS",
    "queryId" : "71a09900-1c13",
    "subRequests" : [{
        "origin" : "WAS",
        "destination" : "LON",
        "carrier" : "AF",
        "fareClasses" : "",
        "owrt" : "1,2"
      }, {
        "origin" : "WAS",
        "destination" : "LON",
        "carrier" : "AA",
        "fareClasses" : "",
        "owrt" : "1,2"
      }, {
        "origin" : "WAS",
        "destination" : "LON",
        "carrier" : "DL",
        "fareClasses" : "",
        "owrt" : "1,2"
      }, {
        "origin" : "WAS",
        "destination" : "LON",
        "carrier" : "LH",
        "fareClasses" : "",
        "owrt" : "1,2"
      }, {
        "origin" : "WAS",
        "destination" : "LON",
        "carrier" : "BA",
        "fareClasses" : "",
        "owrt" : "1,2"
      }]
  },
  "downloadCount" : 0,
  "requestDate" : 20151205,
  "totalRecords" : 6941,
  "status" : "SUCCESS"
}

/* 7 */
{
  "_id" : "a036a42a-918b",
  "uiSearchRequest" : {
    "travelDate" : 20151206,
    "travelDuration" : 7,
    "shopperDuration" : 30,
    "oneWay" : false,
    "userId" : "ATP1MMM",
    "queryId" : "a036a42a-918b",
    "subRequests" : [{
        "origin" : "WAS",
        "destination" : "LON",
        "carrier" : "AA",
        "fareClasses" : "",
        "owrt" : "1,2"
      }]
  },
  "downloadCount" : 0,
  "requestDate" : 20151205,
  "totalRecords" : 1388,
  "status" : "SUCCESS"
}

/* 8 */
{
  "_id" : "c547be36-805c",
  "uiSearchRequest" : {
    "travelDate" : 20151206,
    "travelDuration" : 7,
    "shopperDuration" : 30,
    "oneWay" : false,
    "userId" : "ATP1SXB",
    "queryId" : "c547be36-805c",
    "subRequests" : [{
        "origin" : "CHI",
        "destination" : "LON",
        "carrier" : "BA",
        "fareClasses" : "",
        "owrt" : "1,2"
      }]
  },
  "downloadCount" : 2,
  "requestDate" : 20151205,
  "totalRecords" : 1072,
  "status" : "SUCCESS"
}

My code is below (Although I haven't gotten much far):

library(jsonlite)
library(RJSONIO)
json_data_raw<-fromJSON("mydata.txt")

json_file <- lapply(json_data_raw, function(x) {
  x[sapply(x, is.null)] <- NA
  unlist(x)
})

output <-- do.call("rbind", json_file)
write.csv(a, file="json.csv",row.names = FALSE)
file.show("json.csv")

I'm trying to get my output into a CSV file like below

I'm trying to get output into a CSV file like below

381

asked Jun 23 '16 16:06

Lester Pereira

1 Answers

There are several issues with your text file. As you already noticed, you need to remove the lines of the form /* 0 */. What results is still not valid json. If you want to have several json objects in a file, you need to store them in an array. The json objects are the parts that are closed in curly brakets, e.g.,

{
  "_id" : "93ccbdb6-8947-4687-8e12-edf4e40d6650",
  ...
  "totalRecords" : 0,
  "status" : "SUCCESS"
}

The structure of an array of objects is as follows:

[
  {
    ...
  },
  {
    ...
  }
]

To get your file in shape, you need to add a comma between the objects and add the square brackets. You could do this as follows:

raw <- readLines("mydata.txt")

# get rid of the "/* 0 */" lines
json <- grep("^/\\* [0-9]* \\*/", raw, value = TRUE, invert = TRUE)

# add missing comma after }
n <- length(json)
json[-n] <- gsub("^}$", "},", json[-n])

# add brakets at the beginning and end
json <- c("[", json, "]")

This can be read by fromJSON(), so I assume it is valid json:

library(jsonlite)
table <- fromJSON(json)

The table is nested, that is, some of the tables cells contain a data frame or a list themselves. For example,

table[1,2]
##   travelDate travelDuration shopperDuration oneWay  userId                              queryId
## 1   20151206              7              30  FALSE ATP1KKP 93ccbdb6-8947-4687-8e12-edf4e40d6650
##               subRequests
##     1 WAS, LON, AA, , 1,2

You could use flatten() from the jsonlite package, to get a table with one level of nesting less

flatten(table)[1:3, c(1, 6, 12)]
##                                    _id uiSearchRequest.travelDate uiSearchRequest.subRequests
## 1 93ccbdb6-8947-4687-8e12-edf4e40d6650                   20151206         WAS, LON, AA, , 1,2
## 2 b736c374-b8ae-4e99-8073-9c54517fecd5                   20151206         WAS, LON, AA, , 1,2
## 3 3312605f-8304-4ab8-96d6-6e1a03cfbd9e                   20151206         LON, IAD, AA, , 1,2

The last column is still a list. There are many ways you could handle this. One possibility, is to create a row per subrequest, where the contents of all the other columns (X_id, downloadCount, etc.) are repeated. (This is almost the form that you give in your question, with the only difference that you left cells empty in the reapeated columns, while I repeat the contents.) This is how it can be done:

table <- flatten(fromJSON(json))
tab_list <- lapply(1:nrow(table),
                  function(i) data.frame(table[i, -12], table[i, 12],
                              stringsAsFactors = FALSE))
library(dplyr)
flat_table <- bind_rows(tab_list)

The second line creates a list of data frames. These are combined into a single data frame using bind_rows() from dpylr. (To be more precise, flat_table will be a tbl_df, but the difference to a data.frame is small.) This can then be written to a csv file in the usual way:

write.csv(flat_table, file = "mydata.csv")

answered Oct 07 '22 19:10

Stibu

Related questions
                            
                                POST JSON object to aws lambda
                            
                                Python: Change list type for json decoding
                            
                                How does Google Chrome's Advanced REST client make cross domain POST requests?
                            
                                json_decode() returns blank but it's valid json
                            
                                0x800a1391 - JavaScript runtime error: 'JSON' is undefined in IE 10
                            
                                File or Database? - Best Practice to save Objects on Android-Device
                            
                                Build JSON Hierarchy from Structured Data
                            
                                Convert json value to int in c++
                            
                                JavaScript save blob to localStorage
                            
                                inserting javascript variable value in json
                            
                                Gson fromJson deserialize generics
                            
                                What is the Correct way to define key-value pairs in json schema
                            
                                How to add Label to each state in d3.js (albersUsa)?
                            
                                Poco C++ building nested JSON objects
                            
                                Python: forcing precision on a floating point number in json?
                            
                                How can I process huge JSON files as streams in Ruby, without consuming all memory?
                            
                                How to insert multiple value onkey change action?
                            
                                GSON deserialization with generic types and generic field names
                            
                                psql insert json with double quotes inside strings
                            
                                iteration a json object on Ngfor in angular 2

Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!

Donate Us With

Convert JSON file to a CSV file using R

Tags:

json

r

csv

Lester Pereira

People also ask

1 Answers

Stibu

Recent Activity

Donate For Us