choices combination,order & tree

Tags:

ggplot2

I have following data that represents sequence of person's choice between four values (f1,f2,c1,c2) :

df=structure(list(combi = structure(c(24L, 8L, 3L, 19L, 4L, 23L, 
15L, 12L, 14L, 22L, 5L, 13L, 18L, 9L, 2L, 25L, 11L, 7L, 21L, 
10L, 6L, 17L, 20L, 16L), .Label = c("", "c1-c2-f1-f2", "c1-c2-f2-f1", 
"c1-f1-c2-f2", "c1-f1-f2-c2", "c1-f2-c2-f1", "c1-f2-f1-c2", "c2-c1-f1-f2", 
"c2-c1-f2-f1", "c2-f1-c1-f2", "c2-f1-f2-c1", "c2-f2-c1-f1", "c2-f2-f1-c1", 
"f1-c1-c2-f2", "f1-c1-f2-c2", "f1-c2-c1-f2", "f1-c2-f2-c1", "f1-f2-c1-c2", 
"f1-f2-c2-c1", "f2-c1-c2-f1", "f2-c1-f1-c2", "f2-c2-c1-f1", "f2-c2-f1-c1", 
"f2-f1-c1-c2", "f2-f1-c2-c1"), class = "factor"), nb = c(10L, 
0L, 2L, 4L, 1L, 5L, 1L, 2L, 1L, 3L, 1L, 0L, 3L, 5L, 0L, 18L, 
5L, 2L, 5L, 0L, 4L, 4L, 11L, 2L)), .Names = c("combi", "nb"), class = "data.frame", row.names = c(1L, 
3L, 5L, 7L, 9L, 11L, 13L, 15L, 17L, 19L, 21L, 23L, 25L, 27L, 
29L, 31L, 33L, 35L, 37L, 39L, 41L, 43L, 45L, 47L))

I'm wondering if there's tree representation (or else) that could quantifiy, for each step choices number, by taking in account sub chain that are commun. Example :

f2  (52) -f1 (28)  -c1-c2  (10)
                   -c2-c1  (18)

f2(52) there is 52 times chains begining by f2. there is 28 times chain beginning by f2-f1.

Thanks a lot.

545

asked Mar 13 '18 13:03

2 Answers

If you read the combi values in (using as.character) you can expand those values to character columns:

df2 <-  cbind(df, read.table(text=as.character(df$combi), sep="-",stringsAsFactors=FALSE)  )

Then you can tabulate at whatever level you want:

 xtabs(nb~V1, data=df2) # First level only
#V1
#c1 c2 f1 f2 
#10 12 15 52 

xtabs(nb~paste(V1,V2,sep="-"), data=df2) # first and second
#--
# paste(V1, V2, sep = "-")
#c1-c2 c1-f1 c1-f2 c2-c1 c2-f1 c2-f2 f1-c1 f1-c2 f1-f2 f2-c1 f2-c2 f2-f1 
#    2     2     6     5     5     2     2     6     7    16     8    28

You can also deploy the addmargins function to compactly the display the two "most senior" position sub-totals:

 addmargins( xtabs(nb~V1+V2, data=df2))
 #=========
     V2
V1    c1 c2 f1 f2 Sum
  c1   0  2  2  6  10
  c2   5  0  5  2  12
  f1   2  6  0  7  15
  f2  16  8 28  0  52
  Sum 23 16 35 15  89

This could be "flattened" with ftable:

 ftable( addmargins( xtabs(nb~V1+V2, data=df2)), row.vars=1:2)
V1  V2     
c1  c1    0
    c2    2
    f1    2
    f2    6
    Sum  10
c2  c1    5
    c2    0
    f1    5
    f2    2
    Sum  12
f1  c1    2
    c2    6
    f1    0
    f2    7
    Sum  15
f2  c1   16
    c2    8
    f1   28
    f2    0
    Sum  52
Sum c1   23
    c2   16
    f1   35
    f2   15
    Sum  89

And the final tally would be:

xtabs(nb~paste(V1,V2,V3,V4,sep="-"), data=df2)
#-----
paste(V1, V2, V3, V4, sep = "-")
c1-c2-f1-f2 c1-c2-f2-f1 c1-f1-c2-f2 c1-f1-f2-c2 c1-f2-c2-f1 c1-f2-f1-c2 c2-c1-f1-f2 c2-c1-f2-f1 
          0           2           1           1           4           2           0           5 
c2-f1-c1-f2 c2-f1-f2-c1 c2-f2-c1-f1 c2-f2-f1-c1 f1-c1-c2-f2 f1-c1-f2-c2 f1-c2-c1-f2 f1-c2-f2-c1 
          0           5           2           0           1           1           2           4 
f1-f2-c1-c2 f1-f2-c2-c1 f2-c1-c2-f1 f2-c1-f1-c2 f2-c2-c1-f1 f2-c2-f1-c1 f2-f1-c1-c2 f2-f1-c2-c1 
          3           4          11           5           3           5          10          18

To see it all in a column:

as.matrix( xtabs(nb~paste(V1,V2,V3,V4,sep="-"), data=df2) )
#----------------
            [,1]
c1-c2-f1-f2    0
c1-c2-f2-f1    2
c1-f1-c2-f2    1
c1-f1-f2-c2    1
c1-f2-c2-f1    4
c1-f2-f1-c2    2
c2-c1-f1-f2    0
c2-c1-f2-f1    5
c2-f1-c1-f2    0
c2-f1-f2-c1    5
c2-f2-c1-f1    2
c2-f2-f1-c1    0
f1-c1-c2-f2    1
f1-c1-f2-c2    1
f1-c2-c1-f2    2
f1-c2-f2-c1    4
f1-f2-c1-c2    3
f1-f2-c2-c1    4
f2-c1-c2-f1   11
f2-c1-f1-c2    5
f2-c2-c1-f1    3
f2-c2-f1-c1    5
f2-f1-c1-c2   10
f2-f1-c2-c1   18

I suppose a "final answer with all the subtotals might be:

 ftable( addmargins( xtabs(nb~V1+V2+paste(V3,V4,sep="-"), data=df2)), row.vars=1:3)

However, that has so many zero entries that I hesitate to recommend. You could strip out zero rows:

my.ftable <- ftable( addmargins( xtabs(nb~V1+V2+paste(V3,V4,sep="-"), data=df2)), row.vars=1:3)
my.df.table <- as.data.frame(my.ftable)
names(my.df.table)[3] <- "3rd_4th"
my.df.table[ my.df.table$Freq > 0,  ]
#---------
     V1  V2 3rd_4th Freq
14   f2  f1   c1-c2   10
15  Sum  f1   c1-c2   10
18   f1  f2   c1-c2    3
20  Sum  f2   c1-c2    3
23   f1 Sum   c1-c2    3
24   f2 Sum   c1-c2   10
25  Sum Sum   c1-c2   13
34   f2  c2   c1-f1    3
35  Sum  c2   c1-f1    3
42   c2  f2   c1-f1    2
45  Sum  f2   c1-f1    2
47   c2 Sum   c1-f1    2
49   f2 Sum   c1-f1    3
50  Sum Sum   c1-f1    5
# and many more rows
#...  until
321  c1 Sum     Sum   10
322  c2 Sum     Sum   12
323  f1 Sum     Sum   15
324  f2 Sum     Sum   52
325 Sum Sum     Sum   89

103

answered Oct 03 '22 00:10

The data.tree package specialises in tree representation. It is based on splitting variables in a hierarchal order, for example world -> continent -> country -> city. In your case, you've mentioned every order for c1, c2, f1 and f2. Likely you'd need to do four tree plots e.g. c1 --> either c2, f1 or f2, each leading to the two unused values, and then plot them.

A basic example starting with c1, and then splitting off, and not including specific values:

library(data.tree)
c1 <- Node$new("c1")      # 1st level chain, "c1"
c2 <- c1$AddChild("c2")   # new 2nd level chain, "c2", off c1
f1 <- c2$AddChild("f1-f2")   # new level off c2
f2 <- c2$AddChild("f2-f1")   # new level off c2
f1 <- c1$AddChild("f1")   # new 2nd level chain, "f1", off c1
c2 <- f1$AddChild("c2-f2")   # new level off f1
f2 <- f1$AddChild("f2-c2")   # new level off f1
f2 <- c1$AddChild("f2")   # new 2nd level chain, "f2", off c1
c2 <- f2$AddChild("c2-f1")   # new level off f2
f1 <- f2$AddChild("f1-c2")   # new level off f2

print(c1)
       levelName
1  c1           
2   ¦--c2       
3   ¦   ¦--f1-f2
4   ¦   °--f2-f1
5   ¦--f1       
6   ¦   ¦--c2-f2
7   ¦   °--f2-c2
8   °--f2       
9       ¦--c2-f1
10      °--f1-c2

plot(c1)

enter image description here

answered Oct 03 '22 00:10

MBorg

Related questions
                            
                                filter empty rows from a dataframe with R
                            
                                R: Increase my rvest scraper's speed?
                            
                                Count of unique elements of each row in a data frame in R
                            
                                Disable button in shiny while plot is loading
                            
                                How to install Caret package? While installing, I am getting this message
                            
                                How to detect a blank input for a date in Shiny
                            
                                R | ggplot2 | (remove tick marks + remove panel border) but keep axis lines
                            
                                Quick replace of NA - an error or warning
                            
                                Correlations for pairs of combinations
                            
                                R ggplot2 annotate with subscript and tilde
                            
                                backports 1.1.1 package fails to install
                            
                                Align shared legends to the center of the plot grid (with cowplot)
                            
                                How to rasterize a single layer of a ggplot?
                            
                                R Studio installing stringi fails
                            
                                How do I change a named vector to a data frame retaining the names?
                            
                                glm in python vs R
                            
                                How to call a function for each row of a data.frame?
                            
                                Remove rows in data.table according to another data.table
                            
                                Neatest way to build a data frame from a list of lists in R
                            
                                officer package function for adding an R plot to a presentation

Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!

Donate Us With

choices combination,order & tree

Tags:

r

ggplot2

ranell

People also ask

2 Answers

IRTFM

MBorg

Recent Activity

Donate For Us