I have:
Keyword Date Pos Bid
a 4/11/14 1 5.00
a 4/13/14 1 5.00
a 4/14/14 1 5.00
b 6/2/14 3 9.00
b 7/2/14 4 9.00
b 8/2/14 4 9.00
c 8/29/14 2 3.00
c 8/30/14 2 3.00
c 8/31/14 2 3.00
I need to subset so that only the row with the latest date stays:
Keyword Date Pos Bid
a 4/14/14 1 5.00
b 8/2/14 4 9.00
c 8/31/14 2 3.00
I tried:
Latest = ddply( df,
'Keyword',
function(x) c (
Date = max(as.Date(x$Date, '%m/%d/%y')),
Pos = x$Pos[which(x$Date == max(as.Date(x$Date, '%m/%d/%y')))],
Bid = x$Bid[which(x$Date == max(as.Date(x$Date, '%m/%d/%y')))]
)
)
and
Latest = subset( x,
Date = max(as.Date(Date, '%m/%d/%y')),
select = c('Identity', 'Date', 'Round.Avg.Pos.', 'Search.Bid')
)
But these either give me error or not what I want. What am I missing?
Thanks.
To get the year from a date in R you can use the functions as. POSIXct() and format() . For example, here's how to extract the year from a date: 1) date <- as. POSIXct("02/03/2014 10:41:00", format = "%m/%d/%Y %H:%M:%S) , and 2) format(date, format="%Y") .
You could try
library(dplyr)
library(tidyr)
df %>%
mutate(Date=as.Date(Date, format= "%m/%d/%y"))%>%
group_by(Keyword) %>%
arrange(desc(Date)) %>%
slice(1)
# Keyword Date Pos Bid
#1 a 2014-04-14 1 5
#2 b 2014-08-02 4 9
#3 c 2014-08-31 2 3
Or
df %>%
group_by(Keyword) %>%
mutate(Date=as.Date(Date, format= "%m/%d/%y"))%>%
filter(Date==max(Date))
Or using base R
indx <- with(df, ave(as.Date(Date, format="%m/%d/%y"), Keyword, FUN=max))
df[with(df, as.Date(Date, format='%m/%d/%y')==indx),]
# Keyword Date Pos Bid
#3 a 4/14/14 1 5
#6 b 8/2/14 4 9
#9 c 8/31/14 2 3
Or using ddply
ddply(df, .(Keyword), function(x) {
Date=as.Date(x$Date, '%m/%d/%y')
x[Date==max(Date),]})
# Keyword Date Pos Bid
#1 a 4/14/14 1 5
#2 b 8/2/14 4 9
#3 c 8/31/14 2 3
df <- structure(list(Keyword = c("a", "a", "a", "b", "b", "b", "c",
"c", "c"), Date = c("4/11/14", "4/13/14", "4/14/14", "6/2/14",
"7/2/14", "8/2/14", "8/29/14", "8/30/14", "8/31/14"), Pos = c(1L,
1L, 1L, 3L, 4L, 4L, 2L, 2L, 2L), Bid = c(5, 5, 5, 9, 9, 9, 3,
3, 3)), .Names = c("Keyword", "Date", "Pos", "Bid"), class = "data.frame", row.names = c(NA,
-9L))
Or using data.table
library(data.table)
setDT(df)[ ,.SD[which.max(as.Date(Date, format= "%m/%d/%y"))], by = Keyword]
# Keyword Date Pos Bid
# 1: a 4/14/14 1 5
# 2: b 8/2/14 4 9
# 3: c 8/31/14 2 3
Here's additional base R solution using "split-apply-combine" methodology
do.call(rbind, lapply(split(df, df$Keyword),
function(x) x[which.max(as.Date(x$Date, format='%m/%d/%y')), ]))
# Keyword Date Pos Bid
# a a 4/14/14 1 5
# b b 8/2/14 4 9
# c c 8/31/14 2 3
Note: Your desired output was leaving the Date
column in the same format as before, thus I apply as.Date
in every iteration in both solutions, while the best practice is to convert it to Date
class once and then using the already converted column in the aggregation process
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With