I have a csv file (crop_calendar.csv
) containing information on development stages of crop in a particular region. Basically each row has the following structure:
crop_name sowing_dat emergence_date flowering_date maturity_date harvest_date
which gives for example:
Winter_wheat 18.08 28.08 24.06 30.07 3.08
Winter_rye 18.08 28.08 15.06 23.07 29.07
Spring_wheat 27.04 10.05 1.07 4.08 7.08
Spring_barley 27.04 12.05 27.06 1.08 5.08
Now, I'd like to put that information in a graphic that looks like that:
Any idea how to do it with lots of crop (rows) and at different locations?
Here is an example assuming you have the day.of.year() of sowing and the duration (in days) of the three periods for each crop and each country.
#making random numbers reproducible
set.seed(12345)
rawdata <- expand.grid(
Crop = paste("Crop", LETTERS[1:8]),
Country = paste("Country", letters[10:13])
)
#day.of.year of sowing
rawdata$Sowing <- runif(nrow(rawdata), min = 0, max = 365)
#number of days until mid season
rawdata$Midseason <- runif(nrow(rawdata), min = 10, max = 30)
#number of days until harvest
rawdata$Harvest <- runif(nrow(rawdata), min = 20, max = 150)
#number of days until end of harvest
rawdata$Harvest.end <- runif(nrow(rawdata), min = 10, max = 40)
dataset <- data.frame(Crop = character(0), Country = character(0), Period = character(0), Duration = numeric(0))
#sowing around new year
last.day <- rowSums(rawdata[, c("Sowing", "Midseason")])
if(any(last.day >= 365)){
dataset <- rbind(
dataset,
cbind(
rawdata[last.day >= 365, c("Crop", "Country")],
Period = "Sowing",
Duration = last.day[last.day >= 365] - 365
)
)
dataset <- rbind(
dataset,
cbind(
rawdata[last.day >= 365, c("Crop", "Country")],
Period = "Mid-season",
Duration = rawdata$Harvest[last.day >= 365]
)
)
dataset <- rbind(
dataset,
cbind(
rawdata[last.day >= 365, c("Crop", "Country")],
Period = "Harvest",
Duration = rawdata$Harvest.end[last.day >= 365]
)
)
dataset <- rbind(
dataset,
cbind(
rawdata[last.day >= 365, c("Crop", "Country")],
Period = NA,
Duration = 365 - rowSums(rawdata[last.day >= 365, c("Midseason", "Harvest", "Harvest.end")])
)
)
dataset <- rbind(
dataset,
cbind(
rawdata[last.day >= 365, c("Crop", "Country")],
Period = "Sowing",
Duration = 365 - rawdata$Sowing[last.day >= 365]
)
)
rawdata <- rawdata[last.day < 365, ]
}
#mid-season around new year
last.day <- rowSums(rawdata[, c("Sowing", "Midseason", "Harvest")])
if(any(last.day >= 365)){
dataset <- rbind(
dataset,
cbind(
rawdata[last.day >= 365, c("Crop", "Country")],
Period = "Mid-season",
Duration = last.day[last.day >= 365] - 365
)
)
dataset <- rbind(
dataset,
cbind(
rawdata[last.day >= 365, c("Crop", "Country")],
Period = "Harvest",
Duration = rawdata$Harvest.end[last.day >= 365]
)
)
dataset <- rbind(
dataset,
cbind(
rawdata[last.day >= 365, c("Crop", "Country")],
Period = NA,
Duration = 365 - rowSums(rawdata[last.day >= 365, c("Midseason", "Harvest", "Harvest.end")])
)
)
dataset <- rbind(
dataset,
cbind(
rawdata[last.day >= 365, c("Crop", "Country")],
Period = "Sowing",
Duration = rawdata$Midseason[last.day >= 365]
)
)
dataset <- rbind(
dataset,
cbind(
rawdata[last.day >= 365, c("Crop", "Country")],
Period = "Mid-season",
Duration = 365 - rowSums(rawdata[last.day >= 365, c("Sowing", "Midseason")])
)
)
rawdata <- rawdata[last.day < 365, ]
}
#harvest around new year
last.day <- rowSums(rawdata[, c("Sowing", "Midseason", "Harvest", "Harvest.end")])
if(any(last.day >= 365)){
dataset <- rbind(
dataset,
cbind(
rawdata[last.day >= 365, c("Crop", "Country")],
Period = "Harvest",
Duration = last.day[last.day >= 365] - 365
)
)
dataset <- rbind(
dataset,
cbind(
rawdata[last.day >= 365, c("Crop", "Country")],
Period = NA,
Duration = 365 - rowSums(rawdata[last.day >= 365, c("Midseason", "Harvest", "Harvest.end")])
)
)
dataset <- rbind(
dataset,
cbind(
rawdata[last.day >= 365, c("Crop", "Country")],
Period = "Sowing",
Duration = rawdata$Midseason[last.day >= 365]
)
)
dataset <- rbind(
dataset,
cbind(
rawdata[last.day >= 365, c("Crop", "Country")],
Period = "Mid-season",
Duration = rawdata$Harvest[last.day >= 365]
)
)
dataset <- rbind(
dataset,
cbind(
rawdata[last.day >= 365, c("Crop", "Country")],
Period = "Harvest",
Duration = 365 - rowSums(rawdata[last.day >= 365, c("Sowing", "Midseason", "Harvest")])
)
)
rawdata <- rawdata[last.day < 365, ]
}
#no crop around new year
dataset <- rbind(
dataset,
cbind(
rawdata[, c("Crop", "Country")],
Period = NA,
Duration = rawdata$Sowing
)
)
dataset <- rbind(
dataset,
cbind(
rawdata[, c("Crop", "Country")],
Period = "Sowing",
Duration = rawdata$Midseason
)
)
dataset <- rbind(
dataset,
cbind(
rawdata[, c("Crop", "Country")],
Period = "Mid-season",
Duration = rawdata$Harvest
)
)
dataset <- rbind(
dataset,
cbind(
rawdata[, c("Crop", "Country")],
Period = "Harvest",
Duration = rawdata$Harvest.end
)
)
dataset <- rbind(
dataset,
cbind(
rawdata[, c("Crop", "Country")],
Period = NA,
Duration = 365 - rowSums(rawdata[, c("Sowing", "Midseason", "Harvest")])
)
)
Labels <- c("", "Jan.", "Feb.", "Mar.", "Apr.", "May", "Jun.", "Jul.", "Aug.", "Sep.", "Okt.", "Nov.", "Dec.")
Breaks <- cumsum(c(0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31))
ggplot(dataset, aes(x = Crop, y = Duration, colour = Period, fill = Period)) + geom_bar(stat = "identity") + facet_wrap(~Country) + coord_flip() + scale_fill_manual(values = c("Sowing" = "darkgreen", "Mid-season" = "grey", "Harvest" = "yellow")) + scale_colour_manual(values = c("Sowing" = "black", "Mid-season" = "black", "Harvest" = "black"), guide = "none") + scale_y_continuous("", breaks = Breaks, labels = Labels, limits = c(0, 365)) + theme_bw() + theme(axis.text.x = element_text(hjust = 1))
To add legend place "color=.."
inside the aes()
call in each geom_linerange()
and then add scale_color_identity()
with argument guide="legend"
- this will use color names as actual colors. With labels=
you can change labels in legend. To remove lines between months add minor_breaks=NULL
inside the scale_y_date()
.
ggplot(inDf, aes(x=crop)) +
geom_linerange(aes(ymin=sowing, ymax=emergence, color="green"), size=5) +
geom_linerange(aes(ymin=emergence, ymax=flowering, color="green3"), size=5) +
geom_linerange(aes(ymin=flowering, ymax=maturity, color="yellow"), size=5) +
geom_linerange(aes(ymin=maturity, ymax=harvesting, color="red"), size=5) +
coord_flip() +
scale_y_date(lim = c(as.Date("2012-08-15"), as.Date("2013-09-01")),
breaks=date_breaks(width = "1 month"), labels = date_format("%b"),
minor_breaks=NULL)+
ggtitle('Crop Calendar')+ xlab("")+ylab("")+
scale_color_identity("",guide="legend",
labels=c("emergence","flowering","maturity","harvesting"))
It is a bit difficult to guess what you want to do. With only 3 dates you cannot reproduce the graph you show (requires 4 dates for each crop). It is also not clear what the numbers represent (presumably weeks?). If it is just a question about plotting, this will get you started. Otherwise, please clarify the question.
df <- read.table(text="crop_name emergence_date maturity_date harvest_date
wheat 13.04 25.05 30.06
corn 12.02 21.30 23.11", header=TRUE)
require(ggplot2)
ggplot(df, aes(x=crop_name)) +
geom_linerange(aes(ymin=emergence_date, ymax=maturity_date), color="green3", size=5) +
geom_linerange(aes(ymin=maturity_date, ymax=harvest_date), color="yellow", size=5) +
coord_flip() + ylim(0, 52)
Ok so compiling answers and with additional research, here is the solution I ended up with:
inDf <- read.table(text="crop sowing emergence flowering maturity harvesting
Spring barley 27/04/2013 12/05/2013 27/06/2013 1/08/2013 5/08/2013
Oats 27/04/2013 10/05/2013 29/06/2013 6/08/2013 8/08/2013
Maize 25/05/2013 6/06/2013 18/08/2013 10/09/2013 12/09/2013", header=TRUE)
inDf[, "sowing"] <- as.Date(inDf[, "sowing"], format = '%d/%m/%Y')
inDf[, "emergence"] <- as.Date(inDf[, "emergence"], format = '%d/%m/%Y')
inDf[, "flowering"] <- as.Date(inDf[, "flowering"], format = '%d/%m/%Y')
inDf[, "maturity"] <- as.Date(inDf[, "maturity"], format = '%d/%m/%Y')
inDf[, "harvesting"] <- as.Date(inDf[, "harvesting"], format = '%d/%m/%Y')
ggplot(inDf, aes(x=crop)) +
geom_linerange(aes(ymin=sowing, ymax=emergence), color="green", size=5) +
geom_linerange(aes(ymin=emergence, ymax=flowering), color="green3", size=5) +
geom_linerange(aes(ymin=flowering, ymax=maturity), color="yellow", size=5) +
geom_linerange(aes(ymin=maturity, ymax=harvesting), color="red", size=5) +
coord_flip() + scale_y_date(lim = c(as.Date("2012-08-15"), as.Date("2013-09-01")),breaks=date_breaks(width = "1 month"), labels = date_format("%b"))+
ggtitle('Crop Calendar')+ xlab("")+ylab("")
which gives:
BUT
I'd like now to add the legend and to remove all the white lines between each month. Any ideas? Thanks
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With