Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

extracting data from nested list in R

I have download a list of addresses from google_reverse_code API but for a list of places with latitude and longitude information, since I'm very new in R. I don't know how to extract the useful information. All the code for downloading databases is at the bottom of the question.

The structure of the list in general is this.

`$ 60  :List of 1
..$ results:'data.frame':   1 obs. of  5 variables:
.. ..$ address_components:List of 1
.. .. ..$ :'data.frame':    8 obs. of  3 variables:
.. .. .. ..$ long_name : chr [1:8] "119" "Avenida Diego Díaz de Berlanga" 
"Jardines de Anahuac 2do Sector" "San Nicolás de los Garza" ...
.. .. .. ..$ short_name: chr [1:8] "119" "Avenida Diego Díaz de Berlanga" 
"Jardines de Anahuac 2do Sector" "San Nicolás de los Garza" ...
.. .. .. ..$ types     :List of 8
.. .. .. .. ..$ : chr "street_number"
.. .. .. .. ..$ : chr "route"
.. .. .. .. ..$ : chr [1:3] "political" "sublocality" "sublocality_level_1"
.. .. .. .. ..$ : chr [1:2] "locality" "political"
.. .. .. .. ..$ : chr [1:2] "administrative_area_level_2" "political"
.. .. .. .. ..$ : chr [1:2] "administrative_area_level_1" "political"
.. .. .. .. ..$ : chr [1:2] "country" "political"
.. .. .. .. ..$ : chr "postal_code"
.. ..$ formatted_address : chr "Avenida Diego Díaz de Berlanga 119, Jardines 
de Anahuac 2do Sector, 66444 San Nicolás de los Garza, N.L., Mexico"
.. ..$ geometry          :'data.frame': 1 obs. of  3 variables:
.. .. ..$ location     :'data.frame':   1 obs. of  2 variables:
.. .. .. ..$ lat: num 25.7
.. .. .. ..$ lng: num -100
.. .. ..$ location_type: chr "ROOFTOP"
.. .. ..$ viewport     :'data.frame':   1 obs. of  2 variables:
.. .. .. ..$ northeast:'data.frame':    1 obs. of  2 variables:
.. .. .. .. ..$ lat: num 25.7
.. .. .. .. ..$ lng: num -100
.. .. .. ..$ southwest:'data.frame':    1 obs. of  2 variables:
.. .. .. .. ..$ lat: num 25.7
.. .. .. .. ..$ lng: num -100
.. ..$ place_id          : chr "ChIJRY_wPdqUYoYRTJetT6AJETA"
.. ..$ types             :List of 1
.. .. ..$ : chr "street_address"

I need the information as a data frame perform my analysis. The information specifically is c(latitude, longitude, formatted_address, place_id)

The code that I have written is this:

  prueba <- sapply(direccion1, function(x){
uno <- unlist(x[[1]])
})

pureba2 <- data.frame(prueba)

I get the following error : Error in (function (..., row.names = NULL, check.rows = FALSE, check.names = TRUE, : arguments imply differing number of rows: 40, 32, 37, 44, 36, 0, 41, 28, 39, 47, 43, 35, 48 Among other code that does not work.

The code for downloading the data that contains the longitude and latitude is the following.

 # CRE FILES
 library(easypackages)
 my_packages <- c("ggmap","maps","mapdata","rlist","readr", "tidyverse", 
 "lubridate", "stringr", "rebus", "stringi", "purrr", "geosphere", "XML", 
 "RCurl", "xml2")
 libraries(my_packages)

 # Set link to website 

 link1 <- 
 ("https://publicacionexterna.azurewebsites.net/publicaciones/prices")

 # Get data from webpage

 data_prices <- getURL(link1)

 # Parse XML data

 xmlfile <- xmlParse(data_prices)

 # Get place nodes

 places <- getNodeSet(xmlfile, "//place")

 # Get values for each place

 values <- lapply(places, function(x){

 # Get current place id

 p_id <- xmlAttrs(x)

 # Get values for each gas type for current place

 newrows <- lapply(xmlChildren(x), function(y){

  # Get type and update time

  attrs <- xmlAttrs(y)

  # Get price value

  price <- xmlValue(y)

 names(price) <- "price"

 # Return values

 return(c(p_id, attrs, price)
       )
   })

   # Combine rows to single list

   newrows <- do.call(rbind, newrows)

   # Return rows

   return(newrows)


   })

  # Combine all values into a single dataframe

  datosDePrecios <- as.data.frame(do.call(rbind, values), stringsAsFactors =  
  FALSE)

  # Re-set row names for dataframe

  row.names(datosDePrecios) <- c(1:nrow(datosDePrecios))

   # Set link to website to the places file

   link2 <- 
   ("https://publicacionexterna.azurewebsites.net/publicaciones/places")


   data_places <- read_xml(link2)

   datos_id <- data_places %>%
     xml_find_all("//place") %>%
    xml_attr("place_id")

 datos_name <- data_places %>%
  xml_find_all("//name") %>%
  xml_text("name")

 datos_brand <- data_places %>%
  xml_find_all("//brand") %>%
  xml_text("brand")

datos_cre_id <- data_places %>%
  xml_find_all("//cre_id") %>%
  xml_text("cre_id")

datos_category <- data_places %>%
  xml_find_all("//category") %>%
   xml_text("category")

 datos_adress_street <- data_places %>%
   xml_find_all("//address_street") %>%
   xml_text("adress_street")

 datos_longitud <- data_places %>%
   xml_find_all("//x") %>%
   xml_text("x")

 datos_latitud <- data_places %>%
   xml_find_all("//y") %>%
   xml_text("y")

 datosDeLugares <- data.frame(datos_id, datos_name, 
                    datos_brand, datos_cre_id, 
                    datos_category, datos_adress_street, 
                    datos_latitud, datos_longitud)

 colnames(datosDeLugares) <- c("place_id", "name", "brand","cre_id", 
 "category", "adress_street", "Latitude", "Longitude")


 rm(data_prices,places,values,xmlfile,data_places, datos_adress_street, 
 datos_brand, datos_category, datos_cre_id, datos_id, datos_name, 
 datos_longitud, datos_latitud)

 rm(results, results2)

The code for getting the address information is the following.

datosDePrecios <- datosDePrecios %>%
  data.frame(datosDePrecios) %>%
  mutate(place_id = as.numeric(place_id))

datosDeLugares <- datosDeLugares %>%
  data.frame(datosDeLugares) %>%
  mutate(place_id = as.numeric(place_id))


baseGeneral <- inner_join(datosDeLugares, datosDePrecios, by = "place_id")


baseGeneral <- baseGeneral %>%
  select(Latitude, Longitude, place_id) %>%
  mutate(Latitude = as.numeric(as.character(Latitude))) %>%
  mutate(Longitude = as.numeric(as.character(Longitude))) 

baseGeneral <- baseGeneral[1:100,]

baseGeneral <- apply(baseGeneral,1 ,function(x) {
  google_reverse_geocode(location = c(x["Latitude"],x["Longitude"]), key = 
key, result_type = "street_address")
})

Thank you for your help. :)

like image 726
José Eduardo Jaramillo Barrera Avatar asked Mar 08 '23 12:03

José Eduardo Jaramillo Barrera


1 Answers

You can extract information from lists using either [[ notation, or $

If I take the example given in ?google_reverse_geocode to get a result

library(googleway)

res <- google_reverse_geocode(location = c(-37.81659, 144.9841),
                              result_type = c("street_address"),
                              location_type = "rooftop",
                              key = key)

The lat/lon information is in res$results$geometry$location

The formatted address is in res$results$formatted_address

And the place_id is in res$results$place_id

So you can create your data.frame from these elements

data.frame(
  lat = res$results$geometry$location$lat,
  lon = res$results$geometry$location$lng,
  formatted_address = res$results$formatted_address,
  place_id = res$results$place_id
)

If you had multiple lists of results, then the process is similar, but you need to wrap it in an *apply function (or whatever looping mechanism you prefer)

## a list of locations  
locations <- list(c(-37.81659, 144.9841), c(-37.81827, 144.9671))  

## generating the reverse geocode for each location
lst_res <- lapply(locations, function(x){
  google_reverse_geocode(location = x, key = key)
})

Here, lst_res is a list of all the results from the geocoding function, so you can iterate over it to extract the relevant parts

## now we can extract the information 
lst_df <- lapply(lst_res, function(x){
  data.frame(
    lat = x[['results']][['geometry']][['location']][['lat']],
    lon = x[['results']][['geometry']][['location']][['lng']],
    formatted_address = x[['results']][['formatted_address']],
    place_id = x[['results']][['place_id']]
  )
})

Here, lst_df is a list of data.frames. If you want to join them into one single data.frame you can

df <- do.call(rbind, lst_df)

## et voila!
head(df)
# lat      lon
# 1 -37.81647 144.9841
# 2 -37.81659 144.9841
# 3 -37.81300 144.9850
# 4 -37.81363 144.9631
# 5 -37.81614 144.9805
# 6 -37.81005 144.9281
# formatted_address
# 1 Jolimont Station, 175 Wellington Parade, East Melbourne VIC 3002, Austalia
# 2       Jolimont Station, Wellington Cres, East Melbourne VIC 3002, Australia
# 3                                          East Melbourne VIC 3002, Australia
# 4                                                    Melbourne VIC, Australia
# 5                                          East Melbourne VIC 3002, Australia
# 6                                                   Melbourne, VIC, Australia
# place_id
# 1 ChIJSxAubOpC1moRqhRUnMoZV4M
# 2 ChIJIdtrbupC1moRMPT0CXZWBB0
# 3 ChIJz25SvMFC1moRAOiMIXVWBAU
# 4 ChIJ90260rVG1moRkM2MIXVWBAQ
# 5 ChIJG74w4Upd1moRsDQuRnhWBBw
# 6 ChIJv_FYgkNd1moRpxLuRXZURFs
like image 191
SymbolixAU Avatar answered Mar 11 '23 12:03

SymbolixAU