I want to replicate the below formula R using dplyr
+ lag function. The code works till the 2nd row of each group and then onward gives me 0s
forecast = lag(value,1)*(1-lag(Attrition)/52)
Conditions:
I am getting 0's from 3rd row onward. Below is my code for reproducing.
data <- data %>% group_by(Patch) %>% mutate(id = row_number())
data <- data %>% group_by(Patch) %>% mutate(forecast = lag(Value,1)*(1-lag(Attrition,1)/52))
tbl_df(data)
# A tibble: 12 x 6
Patch Week Value Attrition id forecast
<chr> <date> <dbl> <dbl> <int> <dbl>
1 11P11 2021-06-14 2 0.075 1 NA
2 11P11 2021-06-21 0 0.075 2 2.00
3 11P11 2021-06-28 0 0.075 3 0
4 11P12 2021-06-14 3 0.075 1 NA
5 11P12 2021-06-21 0 0.075 2 3.00
6 11P12 2021-06-28 0 0.075 3 0
7 11P12 2021-07-05 0 0.075 4 0
8 11P13 2021-06-14 1 0.075 1 NA
9 11P13 2021-06-21 0 0.075 2 0.999
10 11P13 2021-06-28 0 0.075 3 0
11 11P13 2021-07-05 0 0.075 4 0
12 11P13 2021-07-12 0 0.075 5 0
> dput(data)
structure(list(Patch = c("11P11", "11P11", "11P11", "11P12",
"11P12", "11P12", "11P12", "11P13", "11P13", "11P13", "11P13",
"11P13"), Week = structure(c(18792, 18799, 18806, 18792, 18799,
18806, 18813, 18792, 18799, 18806, 18813, 18820), class = "Date"),
Value = c(2, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 0), Attrition = c(0.075,
0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075,
0.075, 0.075), id = c(1L, 2L, 3L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 5L), forecast = c(NA, 1.99711538461538, 0, NA, 2.99567307692308,
0, 0, NA, 0.998557692307692, 0, 0, 0)), row.names = c(NA,
-12L), groups = structure(list(Patch = c("11P11", "11P12", "11P13"
), .rows = structure(list(1:3, 4:7, 8:12), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -3L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
Updated Solution
Here is a simple solution using base::Reduce
:
do.call(rbind, lapply(split(df, df$Patch), function(x) {
x$forecast <- c(NA, Reduce(function(a, b) {
a * (1 - (x$Attrition[b]/52))
}, 2:(nrow(x)-1), init = x$Value[1], accumulate = TRUE))
x
}))
Patch Week Value Attrition id forecast
1 11P11 2021-06-14 2 0.075 1 NA
2 11P11 2021-06-21 0 0.075 2 2.0000000
3 11P11 2021-06-28 0 0.075 3 1.9971154
4 11P12 2021-06-14 3 0.075 1 NA
5 11P12 2021-06-21 0 0.075 2 3.0000000
6 11P12 2021-06-28 0 0.075 3 2.9956731
7 11P12 2021-07-05 0 0.075 4 2.9913524
8 11P13 2021-06-14 1 0.075 1 NA
9 11P13 2021-06-21 0 0.075 2 1.0000000
10 11P13 2021-06-28 0 0.075 3 0.9985577
11 11P13 2021-07-05 0 0.075 4 0.9971175
12 11P13 2021-07-12 0 0.075 5 0.9956793
Earlier Approach
You can also use the following approach. For this I first applied your formula with mutate on your data set to get the first value of my forecast
series. Then I sliced the first rows of each group that contains NA
values for forecast
out. After that I used accumulate
function to calculate your desired series using first forecast
value as the value for .init
argument. Then I bind the resulting data set with the one containing NA
values:
library(dplyr)
library(purrr)
df %>%
group_by(Patch) %>%
mutate(forecast = lag(Value)*(1-(lag(Attrition)/52))) %>%
filter(between(row_number(), 2, n())) %>%
mutate(forecast = accumulate(Attrition[-1], .init = forecast[1], ~ ..1 * (1-(..2/52)))) %>%
bind_rows(df %>% group_by(Patch) %>%
mutate(forecast = lag(Value)*(1-(lag(Attrition)/52))) %>%
slice_head()) %>%
ungroup() %>%
arrange(Patch, Week)
# A tibble: 12 x 6
Patch Week Value Attrition id forecast
<chr> <date> <dbl> <dbl> <int> <dbl>
1 11P11 2021-06-14 2 0.075 1 NA
2 11P11 2021-06-21 0 0.075 2 2.00
3 11P11 2021-06-28 0 0.075 3 1.99
4 11P12 2021-06-14 3 0.075 1 NA
5 11P12 2021-06-21 0 0.075 2 3.00
6 11P12 2021-06-28 0 0.075 3 2.99
7 11P12 2021-07-05 0 0.075 4 2.99
8 11P13 2021-06-14 1 0.075 1 NA
9 11P13 2021-06-21 0 0.075 2 0.999
10 11P13 2021-06-28 0 0.075 3 0.997
11 11P13 2021-07-05 0 0.075 4 0.996
12 11P13 2021-07-12 0 0.075 5 0.994
What's tricky about this is that you need to consecutively build the forecast
variable, which is why it won't work in a standard mutate()
call.
Here is my approach that relies on purrr
's map()
and reduce()
for data consolidation:
library(tidyverse)
data %>%
mutate(forecast = NA) %>%
split(~ Patch) %>%
map(~ .x %>%
pmap(~ tibble(...)) %>%
reduce(\(.x, .y) {
prev <- slice_tail(.x)
base_value <- ifelse(prev$Value != 0, prev$Value, prev$forecast)
bind_rows(.x,
mutate(.y,
forecast = base_value * 1 - prev$Attrition / 5))
})) %>%
reduce(bind_rows)
Returns:
# A tibble: 12 x 6
Patch Week Value Attrition id forecast
<chr> <date> <dbl> <dbl> <int> <dbl>
1 11P11 2021-06-14 2 0.075 1 NA
2 11P11 2021-06-21 0 0.075 2 1.98
3 11P11 2021-06-28 0 0.075 3 1.97
4 11P12 2021-06-14 3 0.075 1 NA
5 11P12 2021-06-21 0 0.075 2 2.98
6 11P12 2021-06-28 0 0.075 3 2.97
7 11P12 2021-07-05 0 0.075 4 2.95
8 11P13 2021-06-14 1 0.075 1 NA
9 11P13 2021-06-21 0 0.075 2 0.985
10 11P13 2021-06-28 0 0.075 3 0.97
11 11P13 2021-07-05 0 0.075 4 0.955
12 11P13 2021-07-12 0 0.075 5 0.94
Data used:
data <- structure(list(Patch = c("11P11", "11P11", "11P11", "11P12", "11P12", "11P12", "11P12", "11P13", "11P13", "11P13", "11P13", "11P13"), Week = structure(c(18792, 18799, 18806, 18792, 18799, 18806, 18813, 18792, 18799, 18806, 18813, 18820), class = "Date"), Value = c(2, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 0), Attrition = c(0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075), id = c(1L, 2L, 3L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 5L), forecast = c(NA, 1.99711538461538, 0, NA, 2.99567307692308, 0, 0, NA, 0.998557692307692, 0, 0, 0)), row.names = c(NA, -12L), groups = structure(list(Patch = c("11P11", "11P12", "11P13"), .rows = structure(list(1:3, 4:7, 8:12), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr", "list"))), row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", "tbl_df", "tbl", "data.frame"))
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With