Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Add new variable (column) in the fly to a reactive dataframe in Shiny

Tags:

dataframe

r

shiny

I am trying to add in a reactive dataframe the outpust of both a non-linear regression model and a multivariate analysis. I managed to create the reactive dataframe which is updated anytime I filter my data. I now want to update the model outputs whenever I filter the dataframe and add the prediction values of the model to the reactive dataframe. Below is a subset of the dataset I am using as well as the ui and server files I use to create the shiny App.

Load package

library (shiny)
library(ggvis)
library(dplyr)
library(rbokeh)
library (minpack.lm)
library (hydroGOF)
library(caret)

The dataframe I use:

 Flux_Data_df<- structure(list(Site_ID = structure(c(1L, 3L, 5L, 7L, 8L), .Label = c("AR-Slu", 
"AR-Vir", "AU-Tum", "AU-Wac", "BE-Bra", "BE-Jal", "BE-Vie", "BR-Cax", 
"BR-Ma2", "BR-Sa1", "BR-Sa3", "BW-Ma1", "CA-Ca1", "CA-Ca2", "CA-Ca3", 
"CA-Gro", "Ca-Man", "CA-NS1", "CA-NS2", "CA-NS3", "CA-NS4", "CA-NS5", 
"CA-NS6", "CA-NS7", "CA-Oas", "CA-Obs", "CA-Ojp", "CA-Qcu", "CA-Qfo", 
"CA-SF1", "CA-SF2", "CA-SF3", "CA-SJ1", "CA-SJ2", "CA-SJ3", "CA-TP1", 
"CA-TP2", "CA-TP3", "CA-TP4", "CA-Wp1", "CN-Bed", "CN-Cha", "CN-Din", 
"CN-Ku1", "CN-Qia", "CZ-Bk1", "De-Bay", "DE-Hai", "DE-Har", "DE-Lkb", 
"DE-Meh", "DE-Obe", "DE-Tha", "DE-Wet", "DK-Sor", "ES-Es1", "FI-Hyy", 
"FI-Sod", "FR-Fon", "FR-Hes", "FR-Lbr", "FR-Pue", "GF-Guy", "ID-Pag", 
"IL-Yat", "IS-Gun", "IT-Col", "IT-Cpz", "IT-Lav", "IT-Lma", "IT-Noe", 
"IT-Non", "IT-Pt1", "IT-Ro1", "IT-Ro2", "IT-Sro", "JP-Tak", "JP-Tef", 
"JP-Tom", "MY-Pso", "NL-Loo", "PA-Spn", "PT-Esp", "RU-Fyo", "RU-Skp", 
"RU-Zot", "SE-Abi", "SE-Fla", "SE-Nor", "SE-Sk1", "SE-Sk2", "SE-St1", 
"UK-Gri", "UK-Ham", "US-Bar", "US-Blo", "US-Bn1", "US-Bn2", "Us-Bn3", 
"US-Dk2", "US-Dk3", "US-Fmf", "US-Fuf", "US-Fwf", "US-Ha1", "US-Ha2", 
"US-Ho1", "US-Ho2", "US-Lph", "US-Me1", "US-Me3", "US-Me4", "US-Me6", 
"US-Moz", "US-NC1", "US-Nc2", "US-NR1", "US-Oho", "US-So2", "US-So3", 
"US-Sp1", "US-Sp2", "US-Sp3", "US-Syv", "US-Umb", "US-Wbw", "US-Wcr", 
"US-Wi0", "US-Wi1", "US-Wi2", "US-Wi4", "US-Wi8", "VU-Coc", "CA-Cbo", 
"CN-Lao", "ID-Buk", "JP-Fuj", "RU-Ab", "RU-Be", "RU-Mix"), class = "factor"), 
    Ecosystem = structure(c(5L, 3L, 5L, 5L, 3L), .Label = c("DBF", 
    "DNF", "EBF", "ENF", "MF", "SHB", "WSA"), class = "factor"), 
    Climate = structure(c(3L, 3L, 3L, 3L, 4L), .Label = c("Arid", 
    "Continental", "Temperate", "Tropical"), class = "factor"), 
    Management = structure(c(4L, 2L, 3L, 4L, 4L), .Label = c("High", 
    "Low", "Moderate", "None"), class = "factor"), Stand_Age = c(50, 
    99, 77.0833333333333, 66.2, 97), NEP = c(1262.24986565392, 
    251.665998718498, 89.590110051402, 467.821910494384, 560), 
    GPP = c(2437.9937774539, 1837.82835206203, 1353.91140903122, 
    1740.68843840394, 3630), NEP_GPP = c(0.517741217113419, 0.143353622997247, 
    0.0760076059028116, 0.270737440100469, 0.1542699725), Uncert = c(7.29706486170583, 
    12.3483066698996, 7.59406340226036, 8.2523670901841, 12.1
    ), Gap_filled = c(0.953310527540233, 0.969648973753497, 0.9395474605477, 
    0.923408280276339, 1), MAT = c(19.0438821722383, 9.67003296799878, 
    10.7728316162948, 8.2796213684244, 27.341666667), MAT_An = c(-0.0413522012578611, 
    0.840055031446541, 0.705896226415094, 0.805524109014675, 
    0.191666666666667), MAT_Trend = c(0.0119577487502016, 0.0196238509917756, 
    0.0305871364833632, 0.0381007095629741, 0.0194619147449338
    ), MAP = c(351.700001291931, 1107.49999958277, 844.158499979666, 
    998.205467054248, 2279.5), MAP_CRU = c(592.2, 850.925, 852.591666666667, 
    1098.98, 2279.5), SPI_CRU_Mean = c(-0.352735702252502, 0.188298093749456, 
    0.0830157542916604, 0.397632136136383, 1.31028809089487), 
    MAP_An = c(4.14188988095238, -15.8198660714286, 5.39074900793651, 
    2.28799107142857, 1.55565476190476), MAP_Trend = c(1.38787584993337, 
    0.147192657259031, 0.747167885331603, 0.104885622031644, 
    0.841903850753408), CEC_Total_1km = c(14.05, 10.25, 17.975, 
    21, 9.95), Clay_Silt = c(36.65, 42.125, 32.275, 55, 54.825
    ), Clay_1km = c(26.425, 31.425, 11.25, 22.45, 38.075), Silt_1km = c(10.225, 
    10.7, 21.025, 32.55, 16.75), Sand_1km = c(63.35, 57.325, 
    67.65, 45, 45.275), NOy = c(1.73752826416889, 2.76055219091326, 
    4.96187381895543, 5.06857284157762, 0.90948457442513), NHx = c(2.50363534311763, 
    2.99675999696687, 11.2747222582845, 13.9207300067467, 1.53292533883169
    ), Soil_C_1km = c(3.6, 17, 23.575, 26.65, 8.15), Lat = c(-33.4648, 
    -35.6566, 51.3092, 50.3051, -1.72000003), Long = c(-66.4598, 
    148.1516, 4.5206, 5.9981, -51.4500008)), .Names = c("Site_ID", 
"Ecosystem", "Climate", "Management", "Stand_Age", "NEP", "GPP", 
"NEP_GPP", "Uncert", "Gap_filled", "MAT", "MAT_An", "MAT_Trend", 
"MAP", "MAP_CRU", "SPI_CRU_Mean", "MAP_An", "MAP_Trend", "CEC_Total_1km", 
"Clay_Silt", "Clay_1km", "Silt_1km", "Sand_1km", "NOy", "NHx", 
"Soil_C_1km", "Lat", "Long"), row.names = c(NA, 5L), class = "data.frame")

Choose x and y variable to choose

axis_vars <- c(
  "NEP observed [gC.m-2.y-1]" = "NEP",
  "NEP predicted [gC.m-2.y-1]" = "prediction",
  "CUEe" = "NEP_GPP",
  "GPP [gC.m-2.y-1]" = "GPP",
  "Forest Age [years]" = "Stand_Age",
  "MAT [°C]" = "MAT",
  "SPI" = "SPI_CRU_Mean",
  "MAP [mm.y-1]" = "MAP",
  "MAP trend [mm.y-1]" = "MAP_Trend",
  "MAT tremd [°C.y-1]" = "MAT_Trend",
  "Clay content [kg.kg-1]" = "Clay_1km",
  "N deposition [kg N.ha-1.y-1]" = "NHx"
)

The ui file:

        ui<- actionLink <- function(inputId, ...) {
  tags$a(href='javascript:void',
         id=inputId,
         class='action-button',
         ...)
}


shinyUI(fluidPage(
  titlePanel("Data exploration"),
  p('Interactive tool for data exploration'),
  em('by, ', a('Simon Besnard',  href = 'http://www.bgc-jena.mpg.de/bgi/index.php/People/SimonBesnard')),

  fluidRow(
    column(4,
           wellPanel(
             selectInput("xvar", "X-axis variable", axis_vars, selected = "Stand_Age"),
             selectInput("yvar", "Y-axis variable", axis_vars, selected = "NEP")
           ),

           wellPanel(
             h4("Filter data"),

             sliderInput("Gap_filled", "Fraction gap filling", 0, 1, value = c(0, 1)),
             sliderInput("Uncert", "Uncertainties", 0, 45, value = c(0, 45),
                         step = 1),
             sliderInput("Stand_Age", "Forest age [years]", 0, 400, value = c(0, 400),
                         0, 400, 400, step = 5),
             sliderInput("GPP", "GPP [gC.m-2.y-1]", 0, 4000, value = c(0, 4000),
                         0, 4000, 4000, step = 100),
             sliderInput("MAT", "MAT [°C]", -10, 30, value = c(-10, 30),
                         -10, 30, 30, step = 1),
             sliderInput("MAP", "MAP [mm.y-1]", 0, 4000, value = c(0, 4000),
                         0, 4000, 400, step = 100),
             checkboxGroupInput("Management", "Intensity of management",  c("None", "Low", "Moderate", "High"),
                                selected= c("None", "Low", "Moderate", "High"), inline = T),
             checkboxGroupInput("Climate", "Type of climate",
                                     c("Arid", "Continental", "Temperate", "Tropical"),
                                selected=c("Arid", "Continental", "Temperate", "Tropical"), inline=T),
             checkboxGroupInput("Ecosystem",
                          label="PFTs",
                          choices=list("DBF", "DNF", "EBF", "ENF", "MF",  "SHB"),
                          selected=c("DBF", "DNF", "EBF", "ENF", "MF","SHB"), inline=T)
           )),

    mainPanel(
      navlistPanel(
        tabPanel("Plot", rbokehOutput("rbokeh")),
        tabPanel("Statistics", tableOutput("summaryTable")),
        tabPanel("Variable importance", plotOutput("Var_Imp")),
        tabPanel("Spatial distribution - Flux tower", rbokehOutput("Map_Site"))
      ),
      downloadLink('downloadData', 'Download'))
    ))
)

And the server file:

         server<- shinyServer(function(input, output, session) {


# A reactive expression for filtering dataframe
Update_df <- reactive({

  # Lables for axes
  xvar_name <- names(axis_vars)[axis_vars == input$xvar]
  yvar_name <- names(axis_vars)[axis_vars == input$yvar]
  xvar <- prop("x", as.symbol(input$xvar))
  yvar <- prop("y", as.symbol(input$yvar))

  Flux_Data_df %>%
    filter(
      Gap_filled >= input$Gap_filled[1] & 
      Gap_filled <= input$Gap_filled[2] &
      Uncert > input$Uncert[1] & 
      Uncert < input$Uncert[2] &
      Stand_Age >= input$Stand_Age[1] & 
      Stand_Age <= input$Stand_Age[2] &
      GPP > input$GPP[1] & 
      GPP < input$GPP[2] &
      MAT > input$MAT[1] & 
      MAT < input$MAT[2] &
      MAP > input$MAP[1] & 
      MAP < input$MAP[2]) %>%
    filter(
      Management %in% input$Management &
      Climate %in% input$Climate &
      Ecosystem %in% input$Ecosystem) %>% as.data.frame()
})


# A reactive expression to add model predicion to a new dataframe
Update_df<- reactive({
    for(id in unique(Update_df()$Site_ID)){
      lm.Age<- try(nlsLM(NEP~offset + A*(1-exp(k*Stand_Age)), data = Update_df()[Update_df()$Site_ID != id,], 
                         start = list(A=  711.5423, k= -0.2987, offset= -444.2672),
                         lower= c(A = -Inf, k =  -Inf, offset= -1500), control = list(maxiter = 500), weights = 1/Uncert), silent=TRUE);
      Update_df()$f_Age[Update_df()$Site_ID == id] <- predict(object = lm.Age, newdata = Update_df()[Update_df()$Site_ID == id,])
    } %>% as.data.frame()
})


#Plot scatter plot
output$rbokeh <-  renderRbokeh({
  plot_data<- Update_df()
g<- figure() %>%
    ly_points(x = input$xvar, y = input$yvar, data=plot_data, hover= c(Site_ID, year)) %>%
  x_axis("x", label = names(axis_vars)[axis_vars == input$xvar]) %>%
  y_axis("y", label = names(axis_vars)[axis_vars == input$yvar])
return(g)
})


output$Map_Site <- renderRbokeh({
  plot_data<- Update_df()
  p<- gmap(lat=0, lng=0, zoom = 2, width = 600, height = 600, map_type ="hybrid") %>%
  ly_points(x=Long, y=Lat, data = plot_data, hover= c(Site_ID), col = "red", size=5) %>%
    tool_box_select() %>%
    tool_lasso_select() %>%
    tool_reset()
  return(p)
})


output$downloadData <- downloadHandler(
   filename = function() {
     paste('data-', Sys.Date(), '.csv', sep='')
   },
   content = function(con) {
     write.csv(data, con)
   }
)


})

shinyApp(ui, server)

Basically, I would like to add a prediction column to the updated dataframe anytime a filtering action is done in the shiny app based on the filtering set-up in the ui file. Anyone can help me out with it?

like image 305
SimonB Avatar asked Jun 09 '16 21:06

SimonB


1 Answers

Here is the way the server.R file should be done:

# Provide R code to build the object.
shinyServer(function(input, output, session) {


# A reactive expression for filtering dataframe
Update_df1 <- reactive({
  Flux_Data_df %>%
    filter(
      Gap_filled >= input$Gap_filled[1] & 
      Gap_filled <= input$Gap_filled[2] &
      Uncert > input$Uncert[1] & 
      Uncert < input$Uncert[2] &
      Stand_Age >= input$Stand_Age[1] & 
      Stand_Age <= input$Stand_Age[2] &
      GPP > input$GPP[1] & 
      GPP < input$GPP[2] &
      MAT > input$MAT[1] & 
      MAT < input$MAT[2] &
      MAP > input$MAP[1] & 
      MAP < input$MAP[2]) %>%
    filter(
      Management %in% input$Management &
      Disturbance %in% input$Disturbance &
      Climate %in% input$Climate &
      Ecosystem %in% input$Ecosystem) %>% as.data.frame()
})

# A reactive expression to add model predicion to a new dataframe
Age<-reactive({
  prediction<- Update_df1()
  for(id in unique(prediction$Site_ID)){
    lm_Age<- try(nlsLM(NEP~offset + A*(1-exp(k*Stand_Age)), data = prediction[prediction$Site_ID != id,], 
                       start = list(A=  711.5423, k= -0.2987, offset= -444.2672),
                       lower= c(A = -Inf, k =  -Inf, offset= -1500), control = list(maxiter = 500), weights = 1/Uncert), silent=TRUE)
    prediction$f_Age[prediction$Site_ID == id] <- predict(object = lm_Age, newdata =  prediction[prediction$Site_ID == id,]) 
  }
  return(prediction)
})

Final_df<-reactive({
  df<- Age()
  for(id in unique(df$Site_ID)){
    lm_NEP<- lm(NEP~ (f_Age + Stand_Age + GPP)^2 +
                  Clay_1km + GPP:MAP + SPI_CRU_Mean:NHx + Stand_Age:NHx,
                data = df[df$Site_ID != id,], weights = 1/Uncert)
    df$prediction[df$Site_ID == id] <- predict(object = lm_NEP, newdata = df[df$Site_ID == id,]) 
  }
  return(df)
})

Model_Performance<- reactive({
  Stat<- data.frame(matrix(ncol = 3, nrow = 1))
  colnames(Stat)<- c("R2", "MEF", "RMSE")
  Stat$R2<- round(cor(Final_df()$prediction, Final_df()$NEP, use="complete")^2, digits = 2)
  Stat$RMSE <- round(rmse(Final_df()$prediction, Final_df()$NEP), digits = 2)
  Stat$MEF<-round(NSE(Final_df()$prediction, Final_df()$NEP, na.rm=TRUE), digits=2)
  return(Stat)
})

Var_Imp<- reactive({
  Imp<- data.frame(matrix(ncol = 7, nrow = 1))
  colnames(Imp)<- c("Age", "GPP*Age", "GPP*MAP", "Clay content", "Ndepo*SPI", "GPP", "Ndepo*Age")
  VarImp_NEP<- varImp(lm(NEP ~ (f_Age + Stand_Age + GPP)^2 +
                           Clay_1km + GPP:MAP + SPI_CRU_Mean:NHx + Stand_Age:NHx,
                         data=Final_df(), weights = 1/Uncert))
  Imp$Age<- (VarImp_NEP$Overall[1] + VarImp_NEP$Overall[2] + VarImp_NEP$Overall[5])/ sum(VarImp_NEP$Overall)
  Imp["GPP*Age"]<- (VarImp_NEP$Overall[6] + VarImp_NEP$Overall[7])/ sum(VarImp_NEP$Overall) 
  Imp["GPP*MAP"]<- VarImp_NEP$Overall[8]/ sum(VarImp_NEP$Overall) 
  Imp["Clay content"]<- VarImp_NEP$Overall[4]/ sum(VarImp_NEP$Overall) 
  Imp["Ndepo*SPI"]<- VarImp_NEP$Overall[9]/ sum(VarImp_NEP$Overall)
  Imp["GPP"]<- VarImp_NEP$Overall[3]/ sum(VarImp_NEP$Overall)
  Imp["Ndepo*Age"]<- VarImp_NEP$Overall[10]/ sum(VarImp_NEP$Overall)
  Imp<- gather(Imp)
  colnames(Imp)<- c("Variable", "Percentage")
  Imp$Percentage<- round(Imp$Percentage*100, digits = 1)
  return(Imp)
})

#Plot Univariate
output$Univariate <-  renderRbokeh({
  plot_data<- Final_df()
  plot_data$Stand_Age<- round(plot_data$Stand_Age, digits = 0)
  plot_data$Stand_Age<- round(plot_data$Stand_Age, digits = 0)
g<- figure() %>%
    ly_points(x = input$xvar, y = input$yvar, data=plot_data, hover= c(Site_ID, Stand_Age)) %>%
  x_axis("x", label = names(axis_vars)[axis_vars == input$xvar]) %>%
  y_axis("y", label = names(axis_vars)[axis_vars == input$yvar])
return(g)
})

#Plot model performance
output$Model_perf <-  renderRbokeh({
  plot_data<- Final_df()
  plot_data$Stand_Age<- round(plot_data$Stand_Age, digits = 0)
  g<- figure() %>%
    ly_points(x = prediction, y = NEP, data=plot_data, hover= c(Site_ID, Stand_Age, Ecosystem)) %>%
    ly_abline(a=0, b=1) %>%
    x_axis("NEP predicted [gC.m-2.y-1]") %>%
    y_axis("NEP observed [gC.m-2.y-1]") %>%
    x_range(c(-700, 1500)) %>%
    y_range(c(-700, 1500))
  return(g)
})

#Plot Variable importance
output$Var_Imp <-  renderRbokeh({
  plot_data<- Var_Imp()
  g<- figure() %>%
    ly_points(x =Percentage, y = Variable, data=plot_data, hover= c(Percentage)) %>%
    x_axis("Percentage [%]") %>%
    y_axis("")
  return(g)
})

output$Map_Site <- renderRbokeh({
  plot_data<- Final_df()
  plot_data$Stand_Age<- round(plot_data$Stand_Age, digits = 0)
  p<- gmap(lat=0, lng=0, zoom = 2, width = 600, height = 1000, map_type ="hybrid") %>%
  ly_points(x=Long, y=Lat, data = plot_data, hover= c(Site_ID, Stand_Age), col = "red", size=5) %>%
    tool_box_select() %>%
    tool_lasso_select() %>%
    tool_reset() %>%
    tool_resize()
  return(p)
})

output$Update_data = renderDataTable({
  Final_df()
})

output$Summary_Table = renderDataTable({
  Model_Performance()

})

output$downloadData <- downloadHandler(
  filename = function() {paste('Updated.csv', sep='') },
  content = function(file) {
    write.csv(Final_df(), file)
  }
)

})
like image 94
SimonB Avatar answered Nov 01 '22 00:11

SimonB