Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Cumulative Count Paste

Tags:

r

dplyr

I have this dataset:

  ID Set Type Count
1  1   1    A    NA
2  2   1    R    NA
3  3   1    R    NA
4  4   1    U    NA
5  5   1    U    NA
6  6   1    U    NA
7  7   2    A    NA
8  8   3    R    NA
9  9   3    R    NA

As dputs:

mystart <- structure(list(ID = 1:9, Set = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 
3L, 3L), Type = structure(c(1L, 2L, 2L, 3L, 3L, 3L, 1L, 2L, 2L
), .Label = c("A", "R", "U"), class = "factor"), Count = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA)), .Names = c("ID", "Set", "Type", 
"Count"), class = "data.frame", row.names = c(NA, -9L))

By using dplyr package how can I obtain this:

  ID Set Type  Count
1  1   1    A     A1
2  2   1    R   A1R1
3  3   1    R   A1R2
4  4   1    U A1R2U1
5  5   1    U A1R2U2
6  6   1    U A1R2U3
7  7   2    A     A1
8  8   3    R     R1
9  9   3    R     R2

Again dputs:

myend <- structure(list(ID = 1:9, Set = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 
3L, 3L), Type = structure(c(1L, 2L, 2L, 3L, 3L, 3L, 1L, 2L, 2L
), .Label = c("A", "R", "U"), class = "factor"), Count = structure(c(1L, 
2L, 3L, 4L, 5L, 6L, 1L, 7L, 8L), .Label = c("A1", "A1R1", "A1R2", 
"A1R2U1", "A1R2U2", "A1R2U3", "R1", "R2"), class = "factor")), .Names = c("ID", 
"Set", "Type", "Count"), class = "data.frame", row.names = c(NA, 
-9L))


In short, I want to count the observations of the column "type" within column "set" and print this count(text) cumulatively.

Examining similar posts, I got closely to this:

myend <- structure(list(ID = 1:9, Set = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 
3L, 3L), Type = structure(c(1L, 2L, 2L, 3L, 3L, 3L, 1L, 2L, 2L
), .Label = c("A", "R", "U"), class = "factor"), Count = c(1L, 
1L, 2L, 1L, 2L, 3L, 1L, 1L, 2L)), .Names = c("ID", "Set", "Type", 
"Count"), class = "data.frame", row.names = c(NA, -9L))

With the code:

library(dplyr)
myend <- read.table("mydata.txt", header=TRUE, fill=TRUE)
    myend %>%
    group_by(Set, Type) %>%
    mutate(Count = seq(n())) %>%
    ungroup(myend)

Thank you very much for your help,

like image 624
user193037 Avatar asked Apr 02 '18 09:04

user193037


2 Answers

Base R version :

aggregateGroup <- function(x){

  vecs <- Reduce(f=function(a,b){ a[b] <- sum(a[b],1L,na.rm=TRUE); a },
                 init=integer(0),
                 as.character(x), 
                 accumulate = TRUE)
  # vecs is a list with something like this :
  # [[1]]
  # integer(0)
  # [[2]]
  # A 
  # 1 
  # [[3]]
  # A R 
  # 1 1 
  # ...
  # so we simply turn those vectors into characters using vapply and paste 
  # (excluding the first)
  vapply(vecs,function(y) paste0(names(y),y,collapse=''),FUN.VALUE='')[-1]
}

split(mystart$Count,mystart$Set) <- lapply(split(mystart$Type,mystart$Set), aggregateGroup)

> mystart
  ID Set Type  Count
1  1   1    A     A1
2  2   1    R   A1R1
3  3   1    R   A1R2
4  4   1    U A1R2U1
5  5   1    U A1R2U2
6  6   1    U A1R2U3
7  7   2    A     A1
8  8   3    R     R1
9  9   3    R     R2
like image 122
digEmAll Avatar answered Nov 05 '22 12:11

digEmAll


A dplyr version:

mystart %>%
  group_by(Set) %>%
  mutate(Count = paste0('A', cumsum(Type == 'A'), 
                        'R', cumsum(Type == 'R'),
                        'U', cumsum(Type == 'U'))) %>%
  ungroup()

Which yields

# A tibble: 9 x 4
     ID   Set Type  Count 
  <int> <int> <chr> <chr> 
1     1     1 A     A1R0U0
2     2     1 R     A1R1U0
3     3     1 R     A1R2U0
4     4     1 U     A1R2U1
5     5     1 U     A1R2U2
6     6     1 U     A1R2U3
7     7     2 A     A1R0U0
8     8     3 R     A0R1U0
9     9     3 R     A0R2U0


If you want to omit the variables with count zero, you'd need to wrap a function around it like so
mygroup <- function(lst) {
  name <- names(lst)
  vectors <- lapply(seq_along(lst), function(i) {
    x <- lst[[i]]
    char <- name[i]
    x <- ifelse(x == 0, "", paste0(char, x))
    return(x)
  })
  return(do.call("paste0", vectors))
}

mystart %>%
  group_by(Set) %>%
  mutate(Count = mygroup(list(A = cumsum(Type == 'A'),
                               R = cumsum(Type == 'R'), 
                               U = cumsum(Type == 'U')))) %>%
  ungroup()

This yields

# A tibble: 9 x 4
     ID   Set Type  Count 
  <int> <int> <chr> <chr> 
1     1     1 A     A1    
2     2     1 R     A1R1  
3     3     1 R     A1R2  
4     4     1 U     A1R2U1
5     5     1 U     A1R2U2
6     6     1 U     A1R2U3
7     7     2 A     A1    
8     8     3 R     R1    
9     9     3 R     R2  
like image 26
Jan Avatar answered Nov 05 '22 11:11

Jan