Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Parsing Nested JSON in Haskell with Aeson

I'm trying to parse JSON from a RESTful API. The returned JSON is highly nested and may/may not include certain fields. Here is an example of some returned data:

{
    resultSet : {
        location : [{
                desc : "Tuality Hospital/SE 8th Ave MAX Station",
                locid : 9843,
                dir : "Eastbound",
                lng : -122.978016886765,
                lat : 45.5212880911494
            }
        ],
        arrival : [{
                detour : false,
                status : "estimated",
                locid : 9843,
                block : 9024,
                scheduled : "2014-03-02T16:48:15.000-0800",
                shortSign : "Blue to Gresham",
                dir : 0,
                estimated : "2014-03-02T16:48:15.000-0800",
                route : 100,
                departed : false,
                blockPosition : {
                    at : "2014-03-02T16:16:43.579-0800",
                    feet : 3821,
                    lng : -122.9909514,
                    trip : [{
                            progress : 171494,
                            desc : "Hatfield Government Center",
                            pattern : 140,
                            dir : 1,
                            route : 100,
                            tripNum : "4365647",
                            destDist : 171739
                        }, {
                            progress : 0,
                            desc : "Cleveland Ave",
                            pattern : 10,
                            dir : 0,
                            route : 100,
                            tripNum : "4365248",
                            destDist : 3577
                        }
                    ],
                    lat : 45.5215368,
                    heading : 328
                },
                fullSign : "MAX Blue Line to Gresham",
                piece : "1"
            }, {
                detour : false,
                status : "estimated",
                locid : 9843,
                block : 9003,
                scheduled : "2014-03-02T17:05:45.000-0800",
                shortSign : "Blue to Gresham",
                dir : 0,
                estimated : "2014-03-02T17:05:45.000-0800",
                route : 100,
                departed : false,
                blockPosition : {
                    at : "2014-03-02T16:34:33.787-0800",
                    feet : 3794,
                    lng : -122.9909918,
                    trip : [{
                            progress : 171521,
                            desc : "Hatfield Government Center",
                            pattern : 140,
                            dir : 1,
                            route : 100,
                            tripNum : "4365648",
                            destDist : 171739
                        }, {
                            progress : 0,
                            desc : "Cleveland Ave",
                            pattern : 10,
                            dir : 0,
                            route : 100,
                            tripNum : "4365250",
                            destDist : 3577
                        }
                    ],
                    lat : 45.5216054,
                    heading : 345
                },
                fullSign : "MAX Blue Line to Gresham",
                piece : "1"
            }
        ],
        queryTime : "2014-03-02T16:35:21.039-0800"
    }
}

As you can see, the JSON schema starts with a resultSet which contains a location, arrival, and queryTime. The location in turn, contains a list of locations, arrival contains a list of arrivals, and queryTime is just a UTC time. Then, an arrival can contain a blockPosition, which can contain a trip, etc. Lots of nesting. Lots of optional fields.

To hold all this, I've created a set of new data types. The data types are nested similarly. For each data type, I have an instance of FromJSON (from the Aeson library).

-- Data Type Definitions and FromJSON Instance Definitions ---------------------


data ResultSet
     = ResultSet     { locations    :: LocationList
                      ,arrivals     :: ArrivalList
                      ,queryTime    :: String
                     } deriving Show

instance FromJSON ResultSet where
  parseJSON (Object o) =
    ResultSet <$> ((o .: "resultSet") >>= (.: "location"))
              <*> ((o .: "resultSet") >>= (.: "arrival"))
              <*> ((o .: "resultSet") >>= (.: "queryTime"))
  parseJSON _ = mzero

data TripList        = TripList     {triplist     :: [Trip]}     deriving Show

instance FromJSON TripList where
  parseJSON (Object o) =
    TripList <$> (o .: "trip")
  parseJSON _ = mzero

data LocationList    = LocationList {locationList :: [Location]} deriving Show

instance FromJSON LocationList where
  parseJSON (Object o) =
    LocationList <$> (o .: "location")
  parseJSON _ = mzero

data Location
     = Location      { loc_desc           :: String
                      ,loc_locid          :: Int
                      ,loc_dir            :: String
                      ,loc_lng            :: Double
                      ,loc_lat            :: Double
                     } deriving Show

instance FromJSON Location where
  parseJSON (Object o) =
    Location <$> (o .: "desc")
              <*> (o .: "locid")
              <*> (o .: "dir")
              <*> (o .: "lng")
              <*> (o .: "lat")
  parseJSON _ = mzero

data ArrivalList     = ArrivalList  {arrivalList  :: [Arrival]}  deriving Show

instance FromJSON ArrivalList where
  parseJSON (Object o) =
    ArrivalList <$>  (o .: "arrival")
  parseJSON _ = mzero

data Arrival
     = Arrival       { arr_detour         :: Bool
                      ,arr_status         :: String
                      ,arr_locid          :: Int
                      ,arr_block          :: Int
                      ,arr_scheduled      :: String
                      ,arr_shortSign      :: String
                      ,arr_dir            :: Int
                      ,estimated      :: Maybe String
                      ,route          :: Int
                      ,departed       :: Bool
                      ,blockPosition  :: Maybe BlockPosition
                      ,fullSign       :: String
                      ,piece          :: String
                     } deriving Show

instance FromJSON Arrival where
  parseJSON (Object o) =
    Arrival <$> (o .: "detour")
            <*> (o .: "status")
            <*> (o .: "locid")
            <*> (o .: "block")
            <*> (o .: "scheduled")
            <*> (o .: "shortSign")
            <*> (o .: "dir")
            <*> (o .:? "estimated")
            <*> (o .: "route")
            <*> (o .: "departed")
            <*> (o .:? "blockPosition")
            <*> (o .: "fullSign")
            <*> (o .: "piece")
  parseJSON _ = mzero

data BlockPosition  
     = BlockPosition { bp_at                 :: String
                      ,bp_feet               :: Int
                      ,bp_lng                :: Double
                      ,bp_trip               :: Trip
                      ,bp_lat                :: Double
                      ,bp_heading            :: Int 
                      } deriving Show

instance FromJSON BlockPosition where
  parseJSON (Object o) =
    BlockPosition <$> (o .: "at")
              <*> (o .: "feet")
              <*> (o .: "lng")
              <*> (o .: "trip")
              <*> (o .: "lat")
              <*> (o .: "heading")
  parseJSON _ = mzero

data Trip           
     = Trip          { trip_progress      :: Int
                      ,trip_desc          :: String
                      ,trip_pattern       :: Int
                      ,trip_dir           :: Int
                      ,trip_route         :: Int
                      ,trip_tripNum       :: Int
                      ,trip_destDist      :: Int
                     } deriving Show

instance FromJSON Trip where
  parseJSON (Object o) =
    Trip <$> (o .: "progress")
         <*> (o .: "desc")
         <*> (o .: "pattern")
         <*> (o .: "dir")
         <*> (o .: "route")
         <*> (o .: "tripNum")
         <*> (o .: "destDist")
  parseJSON _ = mzero

Now, the problem: Retrieving the data is easy. I can show the raw JSON by

json <- getJSON stopID
putStrLn (show (decode json :: (Maybe Value)))

But when I try to get the ResultSet data, it fails with Nothing.

putStrLn (show (decode json :: Maybe ResultSet))

However, if I remove the nested data and simply try to get the queryString field (by removing the fields from the data type and instance of FromJSON, it succeeds and returns the queryString field.

data ResultSet
     = ResultSet     { 
                      queryTime    :: String
                     } deriving Show

instance FromJSON ResultSet where
  parseJSON (Object o)
   = ResultSet <$> ((o .: "resultSet") >>= (.: "queryTime"))
  parseJSON _ = mzero

What am I doing wrong? Is this the easiest method of parsing JSON in Haskell? I'm a total noob at this (a student), so please be gentle.

like image 681
Blake Clough Avatar asked Mar 03 '14 00:03

Blake Clough


1 Answers

I solved my problem. I was trying to create data types for my lists of JSON objects returned. For example, for location data, which is returned as a list of locations:

resultSet : {
  location : [{
      desc : "Tuality Hospital/SE 8th Ave MAX Station",
      locid : 9843,
      dir : "Eastbound",
      lng : -122.978016886765,
      lat : 45.5212880911494
    }
  ],

I was setting up an Arrivals data type containing a list of [Arrival]:

data ArrivalList     = ArrivalList  {arrivalList  :: [Arrival]}  deriving Show

Then, when I tried to parse the JSON, I was trying to stuff an ArrivalList into my ResultSet, which is later used to parse the JSON data inside it. But since ArrivalList is not a JSON object, it was failing.

The fix is to NOT use custom data types for the lists. Instead, assign a list to a JSON !Array object, which can later be parsed into its own objects and sub-objects.

 data ResultSet
      = ResultSet     {
                        locations    :: !Array
                       ,arrivals     :: !Array
                       ,queryTime    :: String
                      } deriving Show

Putting it all together:

data ResultSet
    = ResultSet     {
                      locations    :: !Array
                      ,arrivals     :: !Array
                      ,queryTime    :: String
                    } deriving Show

instance FromJSON ResultSet where
  parseJSON (Object o) = ResultSet <$>
                        ((o .: "resultSet") >>= (.: "location"))
                    <*> ((o .: "resultSet") >>= (.: "arrival"))
                    <*> ((o .: "resultSet") >>= (.: "queryTime"))
  parseJSON _ = mzero

data Location
    = Location      { loc_desc           :: String
                      ,loc_locid          :: Int
                      ,loc_dir            :: String
                      ,loc_lng            :: Double
                      ,loc_lat            :: Double
                    } deriving Show

instance FromJSON Location where
  parseJSON (Object o) =
    Location <$> (o .: "desc")
              <*> (o .: "locid")
              <*> (o .: "dir")
              <*> (o .: "lng")
              <*> (o .: "lat")
  parseJSON _ = mzero
like image 172
Blake Clough Avatar answered Nov 09 '22 22:11

Blake Clough