How to do a crosstab with two categorical variables but populate it with the mean of the third variable

Tags:

library(ggplot2)
data(diamonds)
str(diamonds)
## 'data.frame':    53940 obs. of  10 variables:
##  $ carat  : num  0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
##  $ cut    : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
##  $ color  : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
##  $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ...
##  $ depth  : num  61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
##  $ table  : num  55 61 65 58 58 57 57 55 61 61 ...
##  $ price  : int  326 326 327 334 335 336 336 337 337 338 ...
##  $ x      : num  3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
##  $ y      : num  3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
##  $ z      : num  2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...

This is my crosstab

table(diamonds$cut,diamonds$color)
##            
##                D    E    F    G    H    I    J
##   Fair       163  224  312  314  303  175  119
##   Good       662  933  909  871  702  522  307
##   Very Good 1513 2400 2164 2299 1824 1204  678
##   Premium   1603 2337 2331 2924 2360 1428  808
##   Ideal     2834 3903 3826 4884 3115 2093  896

But instead of getting the counts I want the average price (or mean(price) or even max(price)

I tried the Hmisc package but it gives me data in the longer format I needed it to the in the table format above

  summarize(diamonds$price,llist(diamonds$color,diamonds$clarity),max)

    ##    diamonds$color diamonds$clarity diamonds$price
## 1               D               I1          15964
## 4               D              SI2          18693
## 3               D              SI1          18468
## 6               D              VS2          18318
## 5               D              VS1          17936
## 8               D             VVS2          17545
## 7               D             VVS1          17932
## 2               D               IF          18542
## 9               E               I1          11548
## 12              E              SI2          18477
## 11              E              SI1          18731
## 14              E              VS2          18557
## 13              E              VS1          18729
## 16              E             VVS2          18188
## 15              E             VVS1          16256
## 10              E               IF          18700
## 17              F               I1          10685
## 20              F              SI2          18784
## 19              F              SI1          18759
## 22              F              VS2          18791
## 21              F              VS1          18780
## 24              F             VVS2          18614
## 23              F             VVS1          18777
## 18              F               IF          18552
## 25              G               I1          13203
## 28              G              SI2          18804
## 27              G              SI1          18818
## 30              G              VS2          18700
## 29              G              VS1          18419
## 32              G             VVS2          18768
## 31              G             VVS1          18445
## 26              G               IF          18806
## 33              H               I1          17329
## 36              H              SI2          18745
## 35              H              SI1          18803
## 38              H              VS2          18659
## 37              H              VS1          18522
## 40              H             VVS2          17267
## 39              H             VVS1          14603
## 34              H               IF          16300
## 41              I               I1          16193
## 44              I              SI2          18756
## 43              I              SI1          18797
## 46              I              VS2          18823
## 45              I              VS1          18795
## 48              I             VVS2          15952
## 47              I             VVS1          15654
## 42              I               IF          12725
## 49              J               I1          18531
## 52              J              SI2          18710
## 51              J              SI1          18508
## 54              J              VS2          18701
## 53              J              VS1          18706
## 56              J             VVS2          17214
## 55              J             VVS1          17891
## 50              J               IF          18594

730

asked Jun 28 '15 13:06

Ajay Ohri

1 Answers

Try

library(reshape2)
acast(diamonds, cut~color, value.var='price', mean)
#                D        E        F        G        H        I        J
#Fair      4291.061 3682.312 3827.003 4239.255 5135.683 4685.446 4975.655
#Good      3405.382 3423.644 3495.750 4123.482 4276.255 5078.533 4574.173
#Very Good 3470.467 3214.652 3778.820 3872.754 4535.390 5255.880 5103.513
#Premium   3631.293 3538.914 4324.890 4500.742 5216.707 5946.181 6294.592
#Ideal     2629.095 2597.550 3374.939 3720.706 3889.335 4451.970 4918.186

Or using base R

 with(diamonds, tapply(price, list(cut,color), FUN= mean))
 #                 D        E        F        G        H        I        J
 #Fair      4291.061 3682.312 3827.003 4239.255 5135.683 4685.446 4975.655
 #Good      3405.382 3423.644 3495.750 4123.482 4276.255 5078.533 4574.173
 #Very Good 3470.467 3214.652 3778.820 3872.754 4535.390 5255.880 5103.513
 #Premium   3631.293 3538.914 4324.890 4500.742 5216.707 5946.181 6294.592
 #Ideal     2629.095 2597.550 3374.939 3720.706 3889.335 4451.970 4918.186

Or as @DavidArenburg suggested

  xtabs(price ~ cut + color, diamonds)/table(diamonds[c('cut', 'color')])
  #            color
  #cut                D        E        F        G        H        I        J
  #Fair      4291.061 3682.312 3827.003 4239.255 5135.683 4685.446 4975.655
  #Good      3405.382 3423.644 3495.750 4123.482 4276.255 5078.533 4574.173
  #Very Good 3470.467 3214.652 3778.820 3872.754 4535.390 5255.880 5103.513
  #Premium   3631.293 3538.914 4324.890 4500.742 5216.707 5946.181 6294.592
  #Ideal     2629.095 2597.550 3374.939 3720.706 3889.335 4451.970 4918.186

You may also try dcast from the devel version of data.table i.e. v1.9.5.

 library(data.table)
 dcast(as.data.table(diamonds), cut~color, value.var='price', mean)

If the grouping variables are 'clarity' and 'color'

 with(diamonds, tapply(price, list(clarity,color), FUN = mean))

For other functions, change the FUN in tapply or fun.aggregate in acast/dcast

168

answered Oct 05 '22 23:10

akrun

Related questions
                            
                                read.csv Read Specific Row [duplicate]
                            
                                Blend of na.omit and na.pass using aggregate?
                            
                                Split Data Frame into Rows of Fixed Size
                            
                                how to put percentage label in ggplot when geom_text is not suitable?
                            
                                R sapply is.factor
                            
                                nvd3 scatterPlot with rCharts in R: Vary size of points?
                            
                                Extract R^2 from quantile regression / summary()
                            
                                ggplot2 density histogram with custom bin edges
                            
                                How to remove the margins from a ggmap
                            
                                Format Date (Year-Month) in R [duplicate]
                            
                                R shiny using iframe for local files
                            
                                R testthat package: How can I see output from message() when using test_file()
                            
                                How to set different global options in knitr and RStudio for word and html?
                            
                                extend geom_smooth in a single direction
                            
                                Creating asymmetric layouts involving rows and column in Shiny
                            
                                Switch statement is not working for numerical objects
                            
                                Wanted: repeated-pictogram visualization of population split
                            
                                R - graph frequency of observations over time with small value range
                            
                                Finding dot product in r
                            
                                R function with FOR loop on a vector

Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!

Donate Us With

How to do a crosstab with two categorical variables but populate it with the mean of the third variable

Tags:

r

crosstab

hmisc

Ajay Ohri

People also ask

1 Answers

akrun

Recent Activity

Donate For Us