For each value determining if another column contains larger or smaller number

Tags:

r

I am looking at some data for bolts. If I had for example

 diameter     thread
 1            4
 1            6
 1            4
 2            5
 2            7
 3            9

I want a way to make a new column that tells me if it is the largest or smallest thread for each diameter. There is never more than 2 thread sizes for each diameter, however occasionally there is only 1, in which case I would like it to come out as large. For example:

diameter     thread    size
  1            4       small
  1            6       large
  1            4       small
  2            5       small
  2            7       large
  3            9       large

539

asked Jun 16 '16 15:06

ajamess

2 Answers

Quite easy using dplyr

library(dplyr)
data <- data.frame(diameter=c(1,1,2,2,3),thread=c(4,6,5,7,9))
data %>% group_by(diameter) %>% mutate(size=ifelse(thread==max(thread),"large","small"))

   diameter thread  size
      (dbl)  (dbl) (chr)
1        1      4 small
2        1      6 large
3        2      5 small
4        2      7 large
5        3      9 large

174

answered Oct 04 '22 01:10

ZachTurn

How about this (using base R):

dt$size="small"
a=aggregate(dt$thread~dt$diameter, dt, max)[,"dt$thread"]
dt[dt$thread %in% a,]$size="large"

OUTPUT

  diameter thread  size
1        1      4 small
2        1      6 large
3        1      4 small
4        2      5 small
5        2      7 large
6        3      9 large

DATA

dt=structure(list(diameter = c(1L, 1L, 1L, 2L, 2L, 3L), thread = c(4L, 
    6L, 4L, 5L, 7L, 9L)), .Names = c("diameter", "thread"), class = "data.frame", row.names = c(NA, 
    -6L))

BENCHMARK

library(dplyr)
library(microbenchmark)

dt=structure(list(diameter = c(1L, 1L, 1L, 2L, 2L, 3L), thread = c(4L, 
    6L, 4L, 5L, 7L, 9L)), .Names = c("diameter", "thread"), class = "data.frame", row.names = c(NA, 
    -6L))

func_ZachTurn <- function(data){data %>% group_by(diameter) %>% mutate(size=ifelse(thread==max(thread),"large","small"))}
func_m0h3n <- function(dt){dt$size="small";a=aggregate(dt$thread~dt$diameter, dt, max)[,"dt$thread"];dt[dt$thread %in% a,]$size="large";dt}
func_Psidom <- function(df){data.table::setDT(df);df[, size := c("small", "large")[(thread == max(thread)) + 1L], .(diameter)];df[];}
f <- function(x) (if(length(x)==1) 1L else x == max(x)) + 1L
func_docendo.discimus <- function(dat){dat$size <- c("small", "large")[ave(dat$thread, dat$diameter, FUN = f)];dat;}
func_Ernest.A <- function(df){df$size <- factor(unsplit(lapply(split(df$thread, df$diameter), function(x) ifelse(x == max(x), 'large', 'small')), df$diameter));df;}

r <- func_ZachTurn(dt)
all(r == func_m0h3n(dt))
# [1] TRUE
all(r == func_docendo.discimus(dt))
# [1] TRUE
all(r == func_Ernest.A(dt))
# [1] TRUE
all(r == as.data.frame(func_Psidom(dt)))
# [1] TRUE


microbenchmark(func_ZachTurn(dt), func_m0h3n(dt), func_docendo.discimus(dt), func_Ernest.A(dt), func_Psidom(dt))

# Unit: microseconds
                      # expr      min       lq      mean   median        uq      max neval
         # func_ZachTurn(dt) 3477.835 3609.147 3833.5482 3679.079 3860.6490 7136.169   100
            # func_m0h3n(dt) 4436.367 4601.042 4879.2726 4743.474 4859.8150 8578.031   100
 # func_docendo.discimus(dt)  854.168  923.673  999.2991  956.180  992.9645 4422.252   100
         # func_Ernest.A(dt) 1032.101 1086.636 1165.4361 1129.195 1167.9040 4882.057   100
           # func_Psidom(dt) 1537.245 1622.577 1731.0602 1678.822 1742.3395 5424.840   100

answered Oct 04 '22 01:10

989

Related questions
                            
                                Change color of specific tick in ggplot2
                            
                                How to create a conditional dummy in R?
                            
                                Create N random integers with no gaps
                            
                                Reading multiple JSON files in a directory into one Data Frame
                            
                                Find all possible substrings of length n
                            
                                How to include a header based on a condition in knitr
                            
                                Expand Data Frame
                            
                                Add a series of elements in different locations within a vector
                            
                                using dplyr's do() with summary()
                            
                                Are my R scripts identical?
                            
                                R indexing arrays. How to index 3 dimensional array by using a matrix for the 3rd dimension
                            
                                How to plot uploaded dataset using shiny?
                            
                                How to make graph color depend on two criteria in ggplot2?
                            
                                Set a header as the value of a variable in R markdown
                            
                                Adding scroll to sidebar in flexdashboard
                            
                                what is equivalent to do.call(rbind, list)?
                            
                                Rename variable names in dplyr based on vectors new_varname, old_varname [duplicate]
                            
                                See the specific color names from one existing palette in ggplot 2
                            
                                Given an element of a list, how do I recover its index inside the list?
                            
                                R:Inconsistent line thickness in geom_segment ggplot2

Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!

Donate Us With

For each value determining if another column contains larger or smaller number

Tags:

dataframe

r

ajamess

People also ask

2 Answers

ZachTurn

989

Recent Activity

Donate For Us