Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Split date rows by new year

Tags:

I have data from a hospital with many variables, and also from and to dates for each row, which tells us when each row is "valid". Each row can maximum be valid for a year.

test = data.frame(ID=c(10,10,10,12,12), Disease=c("P","P","P","D","P"), Pass=c("US","US","US","EN","EN"),
                  Payment=c(110,110,115,240,255), 
                  from_date=as.POSIXct(c("2008-01-09","2009-01-09","2010-01-09","2008-01-01","2013-12-31")),
                  to_date=as.POSIXct(c("2009-01-08","2010-01-08","2011-01-08","2008-12-31","2014-12-30"))
                  )

For the rows that pass from one year to another, I want to split up the rows, such that I end up with two rows instead of the original row, and also manipulate the from_date and to_date, such that I end up with a new dataset looking like this:

  test_desired = data.frame(ID=c(10,10,10,10,10,10,12,12,12), Disease=c("P","P","P","P","P","P","D","P","P"), Pass=c("US","US","US","US","US","US","EN","EN","EN"),
                              Payment=c(110,110,110,110,115,115,240,255,255), 
                              from_date=as.POSIXct(c("2008-01-09","2009-01-01","2009-01-09","2009-01-01","2010-01-09","2011-01-01","2008-01-01","2013-12-31","2014-01-01")),
                              to_date=as.POSIXct(c("2008-12-31","2009-01-08","2009-12-31","2010-01-08","2010-12-31","2011-01-08","2008-12-31","2013-12-31","2014-12-30"))
    )    

Attempt:

library(lubridate) #for function "year" below
test_desired=test
row=c()
tmp=c()
for(i in 1:nrow(test_desired)){
  if(year(test_desired$from_date)[i]<year(test_desired$to_date)[i]){
    test_desired$to_date[i] = as.POSIXct(paste0(year(test_desired$from_date[i]),"-12-31"))
    row = test_desired[i,]
    row$from_date = as.POSIXct(paste0(year(test$to_date[i]),"-01-01"))
    row$to_date = test$to_date[i]
    tmp=rbind(tmp,row)

  } else next
}
test_desired=rbind(test_desired,tmp)
library(dplyr)
test_desired=arrange(test_desired,ID,from_date)

Is there a more elegant way of doing this, for example with dplyr?