Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to add new calculated variables to a data frame

Tags:

loops

dataframe

r

I would like to create a loop that will add now variables to the data frame.Those variables should be the simple quadratic form of the exsiting varibles. In the example below I would like to have 3 new vars that are : dat$birds_2 <- dat$birds^2; dat$wolfs_2 <- dat$wolfs^2; dat$snakes_2 <- dat$snakes^2. I would like to do this for multiple variables at once.

dat <- read.table(text = " birds    wolfs     snakes
                    3        9         7
                    3        8         4
                    1        2         8
                    1        2         3
                    1        8         3
                    6        1         2
                    6        7         1
                    6        1         5
                    5        9         7
                    3        8         7
                    4        2         7
                    1        2         3
                    7        6         3
                    6        1         1
                    6        3         9
                    6        1         1   ",header = TRUE)

The needed output (dat_new) is (I show only the first 2 rows) :

 dat_new                      birds    wolfs     snakes birds_2    wolfs_2     snakes_2
                                3        9         7    9        81         49  
                                3        8         4    9        64         16
like image 565
mql4beginner Avatar asked Apr 30 '15 09:04

mql4beginner


2 Answers

In a one liner with setNames:

setNames(as.data.frame(cbind(dat, dat^2)), c(names(dat), paste0(names(dat),'_2')))

#   birds wolfs snakes birds_2 wolfs_2 snakes_2
#1      3     9      7       9      81       49
#2      3     8      4       9      64       16
like image 177
Colonel Beauvel Avatar answered Sep 20 '22 20:09

Colonel Beauvel


An option using data.table

library(data.table)
setDT(dat)[, paste0(names(dat),"_2") := lapply(.SD, '^', 2)]
head(dat,2)
#   birds wolfs snakes birds_2 wolfs_2 snakes_2
#1:     3     9      7       9      81       49
#2:     3     8      4       9      64       16

Or you can use set (which would be more efficient) as there are multiple columns

setDT(dat)
dat_new <- copy(dat)
for(j in 1:ncol(dat_new)){
   set(dat_new, i=NULL, j=j, value=dat_new[[j]]^2)
 }
 cbind(dat, dat_new)

Benchmarks

set.seed(24)
dat <- as.data.frame(matrix(sample(0:20, 1e6*200, replace=TRUE), 
       ncol=200))

dat1 <- copy(dat)
dat2 <- copy(dat)


Colonel <- function() { setNames(as.data.frame(cbind(dat, dat^2)),
    c(names(dat), paste0(names(dat),'_2')))}
akrun1 <- function() {setDT(dat1)[, paste0(names(dat1),"_2") := 
            lapply(.SD, '^', 2)]}
akrun2 <- function() {setDT(dat2)
                  dat_new <- copy(dat2)
                  for(j in 1:ncol(dat_new)){
                      set(dat_new, i=NULL, j=j, value=dat_new[[j]]^2)
                   }
                  cbind(dat2, dat_new)}

jaap <- function() {dat_new <- dat %>% 
                     mutate_each(funs(.^2))
                names(dat_new) <- paste0(names(dat_new),"_2")
                dat_new <- cbind(dat,dat_new)}



 cathG <- function() {ncol_ori <- ncol(dat)
                datN <- cbind(dat, apply(dat, 2, "^", 2))
                colnames(datN)[(ncol_ori+1):ncol(datN)] <- 
            paste(colnames(datN)[1:ncol_ori], 2, sep="_")

     }

system.time(Colonel())
#   user  system elapsed 
#  5.589   1.472  46.843 

 system.time(akrun1())
 #   user  system elapsed 
 #  2.125   1.238  10.065 

system.time(akrun2())
#   user  system elapsed 
#  1.522   0.744   3.922 

system.time(jaap())
#   user  system elapsed 
#  1.597   0.926  11.153 

system.time(cathG())
#   user  system elapsed 
#  9.386   3.536  94.360 
like image 45
akrun Avatar answered Sep 20 '22 20:09

akrun