Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Calculating minutes per day given the start and stop times

Tags:

time

r

How does one extract the minutes from a time interval so that the number of minutes from each date is returned?

I have a dataframe with the case ID, start & stop times, and the total elapsed time interval in minutes. I need to figure out how many minutes were on each date for each case.

This is somewhat similar to this question but I was unable to transfer that approach to this situation.

The original data set is structured like this, however the final solution will need to scale up to allow many more cases (see alternative dataset below).

original data

>so
  Case               Start                Stop   Minutes
1    A 2013-01-10 18:00:00 2013-01-10 20:30:00  150 mins
2    B 2013-01-15 22:45:00 2013-01-16 00:15:00   90 mins
3    C 2013-01-20 22:00:00 2013-01-22 02:00:00 1680 mins
4    D 2013-01-27 12:00:00 2013-01-28 06:00:00 1080 mins
5    E 2013-01-01 00:00:00 2013-01-01 02:00:00  120 mins
6    F 2013-01-02 08:00:00 2013-01-03 07:00:00 1380 mins

desired output

  Case       Date Minutes
1    A 2013-01-10     150
2    B 2013-01-15      75
3    B 2013-01-16      15
4    C 2013-01-20     120
5    C 2013-01-21    1440
6    C 2013-01-22     120
7    D 2013-01-27     720
8    D 2013-01-28     360
9    E 2013-01-01     120
10   F 2013-01-02     960 
11   F 2013-01-03     420

DPUT of original data

> dput(so)  
structure(list(  
Case = structure(1:5, .Label = c("A", "B", "C", "D", "E"), class = "factor"), 
Start = structure(c(2L, 3L, 4L, 5L, 1L), .Label = c("2013-01-01 00:00:00", "2013-01-10 18:00:00", "2013-01-15 22:45:00", "2013-01-20 22:00:00", "2013-01-27 12:00:00"), class = "factor"),  
Stop = structure(c(2L, 3L, 4L, 5L, 1L), .Label = c("2013-01-01 02:00:00", "2013-01-10 20:30:00", "2013-01-16 00:15:00", "2013-01-22 02:00:00", "2013-01-28 06:00:00"), class = "factor"), 
Minutes = structure(c(150, 90, 1680, 1080, 120), tzone = "", units = "mins", class = "difftime")
), 
.Names = c("Case","Start", "Stop", "Minutes"), 
row.names = c(NA, -5L), 
class = "data.frame")  

DPUT of alternative larger data set

> dput(so_alt)
structure(list(Case = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L), .Label = c("A", "B", "C", "D", "E", "F", "G", "H"), class = "factor"),     Start = structure(c(12L, 13L, 4L, 5L, 19L, 20L, 21L, 30L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 9L, 6L, 7L, 8L, 3L, 10L, 11L, 14L, 15L, 16L), .Label = c("2012-10-25 18:10:00", "2012-11-01 20:18:00", "2012-11-16 17:45:00", "2012-12-06 01:00:00", "2012-12-06 08:00:00", "2012-12-26 13:15:00", "2012-12-29 19:50:00", "2012-12-30 00:00:00", "2013-01-01 01:46:00", "2013-01-10 20:15:00", "2013-01-11 09:00:00", "2013-01-29 17:00:00", "2013-02-05 21:30:00", "2013-02-21 01:50:00", "2013-02-21 09:25:00", "2013-02-21 12:20:00", "2013-02-22 21:45:00", "2013-02-24 15:30:00", "2013-03-01 10:10:00", "2013-03-06 16:15:00", "2013-03-07 20:00:00", "2013-03-12 21:00:00", "2013-03-13 05:15:00", "2013-03-14 00:45:00", "2013-03-14 11:30:00", "2013-03-15 21:00:00", "2013-03-16 08:15:00", "2013-03-17 06:45:00", "2013-03-18 18:04:00", "2013-03-21 21:40:00"), class = "factor"), Stop = structure(c(12L, 13L, 4L, 5L, 19L, 20L, 21L, 30L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 9L, 6L, 7L, 8L, 3L, 10L, 11L, 14L, 15L, 16L), .Label = c("2012-10-25 21:30:00", "2012-11-03 08:00:00", "2012-11-17 08:45:00", "2012-12-06 05:00:00", "2012-12-06 09:30:00", "2012-12-29 19:50:00", "2012-12-30 00:00:00", "2012-12-31 20:00:00", "2013-01-02 06:00:00", "2013-01-10 22:13:00", "2013-01-12 03:30:00", "2013-01-30 07:45:00", "2013-02-06 07:30:00", "2013-02-21 09:25:00", "2013-02-21 12:20:00", "2013-02-22 11:30:00", "2013-02-24 10:15:00", "2013-02-24 16:45:00", "2013-03-06 07:45:00", "2013-03-07 07:30:00", "2013-03-08 08:00:00", "2013-03-13 05:00:00", "2013-03-14 00:45:00", "2013-03-14 10:15:00", "2013-03-14 19:00:00", "2013-03-15 22:36:00", "2013-03-16 19:00:00", "2013-03-18 15:15:00", "2013-03-18 22:00:00", "2013-03-22 08:30:00"), class = "factor"), Minutes = structure(c(885, 600, 240, 90, 7055, 915, 720, 650, 480, 1170, 570, 450, 96, 645, 1950, 236, 1694, 4715, 250, 2640, 900, 118, 1110, 455, 175, 1390), class = "difftime", units = "mins")), .Names = c("Case", "Start", "Stop", "Minutes"), row.names = c(NA, 26L), class = "data.frame")  
>  
like image 771
Rollie Avatar asked Oct 22 '22 06:10

Rollie


2 Answers

Here is a way.

library(plyr)

ddply(ddply(so, 1, function(x) {
    d1 <- as.Date(x$Start)
    d2 <- as.Date(x$Stop)
    if (d1 == d2) data.frame(date = d1, min = x$Minutes)
    else {
        ret <- rbind(data.frame(date=d1, min = difftime(as.POSIXct(paste(d1+1, "00:00:00")), as.POSIXct(x$Start), units = "mins")), 
        data.frame(date=d2, min=difftime(as.POSIXct(x$Stop), as.POSIXct(paste(d2, "00:00:00")), units = "mins")))
        if (d2-d1>1) {
            ret <- rbind(ret, data.frame(date = seq(d1+1, d2-1, by = "day"), min = 60*24))
        }
        ret
    }
}), .(Case, date), summarise, min = sum(min))

Output is:

  Case       date       min
1    A 2013-01-10  150 mins
2    B 2013-01-15   75 mins
3    B 2013-01-16   15 mins
4    C 2013-01-20  120 mins
5    C 2013-01-21 1440 mins
6    C 2013-01-22  120 mins

The final version here:

ddply(ddply(data.frame(.id = 1:nrow(so_alt), so_alt), .(.id, Case), function(x) {
    d1 <- as.Date(x$Start)
    d2 <- as.Date(x$Stop)
    if (d1 == d2) data.frame(date = d1, min = x$Minutes)
    else {
        ret <- rbind(data.frame(date=d1, min = difftime(as.POSIXct(paste(d1+1, "00:00:00")), as.POSIXct(x$Start), units = "mins")), 
        data.frame(date=d2, min=difftime(as.POSIXct(x$Stop), as.POSIXct(paste(d2, "00:00:00")), units = "mins")))
        if (d2-d1>1) {
            ret <- rbind(ret, data.frame(date = seq(d1+1, d2-1, by = "day"), min = 60*24))
        }
        ret
    }
}), .(Case, date), summarise, min = sum(min))

and output:

   Case       date       min
1     A 2013-01-01 1334 mins
2     A 2013-01-02  360 mins
3     A 2013-01-29  420 mins
4     A 2013-01-30  465 mins
5     A 2013-02-21  175 mins
6     A 2013-03-12  180 mins
7     A 2013-03-13  300 mins
8     B 2012-12-26  645 mins
9     B 2012-12-27 1440 mins
10    B 2012-12-28 1440 mins
11    B 2012-12-29 1190 mins
12    B 2013-02-05  150 mins
13    B 2013-02-06  450 mins
14    B 2013-02-21  700 mins
15    B 2013-02-22  690 mins
16    B 2013-03-13 1125 mins
17    B 2013-03-14   45 mins
18    C 2012-12-06  240 mins
19    C 2012-12-29  250 mins
20    C 2012-12-30    0 mins
21    C 2013-03-14  570 mins
22    D 2012-12-06   90 mins
23    D 2012-12-30 1440 mins
24    D 2012-12-31 1200 mins
25    D 2013-03-14  450 mins
26    E 2012-11-16  375 mins
27    E 2012-11-17  525 mins
28    E 2013-03-01  830 mins
29    E 2013-03-02 1440 mins
30    E 2013-03-03 1440 mins
31    E 2013-03-04 1440 mins
32    E 2013-03-05 1440 mins
33    E 2013-03-06  465 mins
34    E 2013-03-15   96 mins
35    F 2013-01-10  118 mins
36    F 2013-03-06  465 mins
37    F 2013-03-07  450 mins
38    F 2013-03-16  645 mins
39    G 2013-01-11  900 mins
40    G 2013-01-12  210 mins
41    G 2013-03-07  240 mins
42    G 2013-03-08  480 mins
43    G 2013-03-17 1035 mins
44    G 2013-03-18  915 mins
45    H 2013-02-21  455 mins
46    H 2013-03-18  236 mins
47    H 2013-03-21  140 mins
48    H 2013-03-22  510 mins
like image 184
kohske Avatar answered Oct 24 '22 02:10

kohske


Here a data.table solution. The idea is quite simple(maybe not very efficient): Create a vector of minutes for each Case, transform it to a vector of days and compute the frequency using table.

DT <- as.data.table(dat)
DT[,list(seqD= seq(as.Date(Start),as.Date(Stop),'day'),
         duration= { 
           seq.minus <- seq(as.POSIXct(Start),
                                   as.POSIXct(Stop),'min')[-1]
           as.data.frame(table(format(seq.minus,'%Y-%m-%d')))$Freq
           }),
   by = list(Case)]

   Case       seqD duration
1:    A 2013-01-10      150
2:    B 2013-01-15       74
3:    B 2013-01-16       16
4:    C 2013-01-20      119
5:    C 2013-01-21     1440
6:    C 2013-01-22      121
like image 20
agstudy Avatar answered Oct 24 '22 01:10

agstudy