complete.test <- tibble(col1 = c("a", "a", "b", "b"),
col2 = c(as.Date("2019-01-01"),
as.Date("2019-01-02"),
as.Date("2019-01-03"),
as.Date("2019-01-04")),
col3 = runif(4),
col4 = runif(4))
complete.test %>% complete(col1, col2)
#> # A tibble: 8 x 4
#> col1 col2 col3 col4
#> <chr> <date> <dbl> <dbl>
#> 1 a 2019-01-01 0.154 0.143
#> 2 a 2019-01-02 0.746 0.526
#> 3 a 2019-01-03 NA NA
#> 4 a 2019-01-04 NA NA
#> 5 b 2019-01-01 NA NA
#> 6 b 2019-01-02 NA NA
#> 7 b 2019-01-03 0.997 0.772
#> 8 b 2019-01-04 0.989 0.460
The tidyr complete()
function is working like it always does above. But if we use the specific data set shown below it "stops" working. Maybe user error. Please read on.
library(tidyverse)
df <- structure(list(`Business Group` = c("ABC", "ABC", "ABC",
"ABC", "ABC", "ABC", "ABC", "ABC", "ABC",
"ABC", "DEF", "DEF", "DEF", "DEF", "DEF",
"DEF", "DEF", "DEF", "GHI", "GHI",
"GHI", "GHI", "GHI",
"GHI", "GHI", "GHI",
"GHI", "GHI", "GHI",
"GHI"), Month = structure(c(17866, 17897, 17928,
17956, 17987, 18017, 18048, 18078, 18109, 18140, 17956, 17987,
18017, 18048, 18078, 18109, 18140, 18170, 13970, 14000, 14031,
14061, 14092, 14123, 14153, 14184, 14214, 14245, 14276, 14304
), class = "Date"), SumChange = c(0, 0, 0, 1, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 0, 2, 0, -12, 3, 4, 3, 4,
3), `Qty Items Open 90 Days` = c(0, 0, 0, 1, 2, 3, 4, 5, 5, 5,
0, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 0, 2, 2, -10, -7, -3, 0, 4,
7)), row.names = c(NA, -30L), groups = structure(list(`Business Group` = c("ABC",
"DEF", "GHI"), .rows = list(1:10, 11:18, 19:30)), row.names = c(NA,
-3L), class = c("tbl_df", "tbl", "data.frame"), .drop = FALSE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
#> # A tibble: 30 x 4
#> # Groups: Business Group [3]
#> `Business Group` Month SumChange `Qty Items Open 90 Days`
#> <chr> <date> <dbl> <dbl>
#> 1 ABC 2018-12-01 0 0
#> 2 ABC 2019-01-01 0 0
#> 3 ABC 2019-02-01 0 0
#> 4 ABC 2019-03-01 1 1
#> 5 ABC 2019-04-01 1 2
#> 6 ABC 2019-05-01 1 3
#> 7 ABC 2019-06-01 1 4
#> 8 ABC 2019-07-01 1 5
#> 9 ABC 2019-08-01 0 5
#> 10 ABC 2019-09-01 0 5
#> 11 DEF 2019-03-01 0 0
#> 12 DEF 2019-04-01 0 0
#> 13 DEF 2019-05-01 0 0
#> 14 DEF 2019-06-01 0 0
#> 15 DEF 2019-07-01 0 0
#> 16 DEF 2019-08-01 0 0
#> 17 DEF 2019-09-01 0 0
#> 18 DEF 2019-10-01 -3 -3
#> 19 GHI 2008-04-01 0 0
#> 20 GHI 2008-05-01 0 0
#> 21 GHI 2008-06-01 0 0
#> 22 GHI 2008-07-01 0 0
#> 23 GHI 2008-08-01 2 2
#> 24 GHI 2008-09-01 0 2
#> 25 GHI 2008-10-01 -12 -10
#> 26 GHI 2008-11-01 3 -7
#> 27 GHI 2008-12-01 4 -3
#> 28 GHI 2009-01-01 3 0
#> 29 GHI 2009-02-01 4 4
#> 30 GHI 2009-03-01 3 7
You can see the data frame above consists of three groups "ABC", "DEF", and "GHI". I know the complete(`Business Group`, Month)
function isn't behaving as I'd expect because it's not completing the data frame with missing combinations of data for Business Group
and Month
. The "GHI" business group has dates back to 2009 that aren't being completed for groups "ABC" and "DEF". In addition nothing is being completed. Any idea what's wrong?
df %>% complete(`Business Group`, Month)
#> # A tibble: 30 x 4
#> # Groups: Business Group [3]
#> `Business Group` Month SumChange `Qty Items Open 90 Days`
#> <chr> <date> <dbl> <dbl>
#> 1 ABC 2018-12-01 0 0
#> 2 ABC 2019-01-01 0 0
#> 3 ABC 2019-02-01 0 0
#> 4 ABC 2019-03-01 1 1
#> 5 ABC 2019-04-01 1 2
#> 6 ABC 2019-05-01 1 3
#> 7 ABC 2019-06-01 1 4
#> 8 ABC 2019-07-01 1 5
#> 9 ABC 2019-08-01 0 5
#> 10 ABC 2019-09-01 0 5
#> 11 DEF 2019-03-01 0 0
#> 12 DEF 2019-04-01 0 0
#> 13 DEF 2019-05-01 0 0
#> 14 DEF 2019-06-01 0 0
#> 15 DEF 2019-07-01 0 0
#> 16 DEF 2019-08-01 0 0
#> 17 DEF 2019-09-01 0 0
#> 18 DEF 2019-10-01 -3 -3
#> 19 GHI 2008-04-01 0 0
#> 20 GHI 2008-05-01 0 0
#> 21 GHI 2008-06-01 0 0
#> 22 GHI 2008-07-01 0 0
#> 23 GHI 2008-08-01 2 2
#> 24 GHI 2008-09-01 0 2
#> 25 GHI 2008-10-01 -12 -10
#> 26 GHI 2008-11-01 3 -7
#> 27 GHI 2008-12-01 4 -3
#> 28 GHI 2009-01-01 3 0
#> 29 GHI 2009-02-01 4 4
#> 30 GHI 2009-03-01 3 7
It is a grouped tbl_df
library(dplyr)
library(tidyr)
df %>%
group_vars()
#[1] "Business Group"
ungroup
and it should work
df %>%
ungroup %>%
complete(`Business Group`, Month)
# A tibble: 69 x 4
# `Business Group` Month SumChange `Qty Items Open 90 Days`
# <chr> <date> <dbl> <dbl>
# 1 ABC 2008-04-01 NA NA
# 2 ABC 2008-05-01 NA NA
# 3 ABC 2008-06-01 NA NA
# 4 ABC 2008-07-01 NA NA
# 5 ABC 2008-08-01 NA NA
# 6 ABC 2008-09-01 NA NA
# 7 ABC 2008-10-01 NA NA
# 8 ABC 2008-11-01 NA NA
# 9 ABC 2008-12-01 NA NA
#10 ABC 2009-01-01 NA NA
# … with 59 more rows
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With