I know that in ggplot2
one can add the convex hull to a scatterplot by group as in
library(ggplot2)
library(plyr)
data(iris)
df<-iris
find_hull <- function(df) df[chull(df$Sepal.Length, df$Sepal.Width), ]
hulls <- ddply(df, "Species", find_hull)
plot <- ggplot(data = df, aes(x = Sepal.Length, y = Sepal.Width, colour=Species, fill = Species)) +
geom_point() +
geom_polygon(data = hulls, alpha = 0.5) +
labs(x = "Sepal.Length", y = "Sepal.Width")
plot
I was wondering though how one could calculate and add alpha bags instead, i.e. the largest convex hull that contains at least a proportion 1-alpha of all the points? Either in 2d (to display with ggplot2) or 3d (to display with rgl).
EDIT: My initial idea was be to keep on "peeling" the convex hull for as along as the criterion of containing at least a given % of points would be satisfied, although in the paper here it seems they use a different algorithm (isodepth, which seems to be implemented in R package depth, in function isodepth and aplpack::plothulls seems also close to what I want (although it produces a full plot as opposed to just the contour), so I think with these I may be sorted. Though these function only works in 2D, and I would also be interested in a 3D extension (to be plotted in rgl). If anyone has any pointers let me know!
EDIT2: with function depth::isodepth
I found a 2d solution (see post below), although I am still looking for a 3D solution as well - if anyone would happen to know how to do that, please let me know!
Ha with the help of function depth::isodepth
I came up with the following solution - here I find the alpha bag that contains a proportion of at least 1-alpha of all points :
library(mgcv)
library(depth)
library(plyr)
library(ggplot2)
data(iris)
df=iris[,c(1,2,5)]
alph=0.05
find_bag = function(x,alpha=alph) {
n=nrow(x)
target=1-alpha
propinside=1
d=1
while (propinside>target) {
p=isodepth(x[,1:2],dpth=d,output=T, mustdith=T)[[1]]
ninside=sum(in.out(p,as.matrix(x[,1:2],ncol=2))*1)
nonedge=sum(sapply(1:nrow(p),function (row)
nrow(merge(round(setNames(as.data.frame(p[row,,drop=F]),names(x)[1:2]),5),as.data.frame(x[,1:2])))>0)*1)-3
propinside=(ninside+nonedge)/n
d=d+1
}
p=isodepth(x[,1:2],dpth=d-1,output=T, mustdith=T)[[1]]
p }
bags <- ddply(df, "Species", find_bag,alpha=alph)
names(bags) <- c("Species",names(df)[1:2])
plot <- ggplot(data = df, aes(x = Sepal.Length, y = Sepal.Width, colour=Species, fill = Species)) +
geom_point() +
geom_polygon(data = bags, alpha = 0.5) +
labs(x = "Sepal.Length", y = "Sepal.Width")
plot
EDIT2: Using my original idea of convex hull peeling I also came up with the following solution which now works in 2d & 3d; the result is not quite the same is with the isodepth algorithm, but it's pretty close :
# in 2d
library(plyr)
library(ggplot2)
data(iris)
df=iris[,c(1,2,5)]
alph=0.05
find_bag = function(x,alpha=alph) {
n=nrow(x)
propinside=1
target=1-alpha
x2=x
while (propinside>target) {
propinside=nrow(x2)/n
hull=chull(x2)
x2old=x2
x2=x2[-hull,]
}
x2old[chull(x2old),] }
bags <- ddply(df, "Species", find_bag, alpha=alph)
plot <- ggplot(data = df, aes(x = Sepal.Length, y = Sepal.Width, colour=Species, fill = Species)) +
geom_point() +
geom_polygon(data = bags, alpha = 0.5) +
labs(x = "Sepal.Length", y = "Sepal.Width")
plot
# in 3d
library(plyr)
library(ggplot2)
data(iris)
df=iris[,c(1,2,3,5)]
levels=unique(df[,"Species"])
nlevels=length(levels)
zoom=0.8
cex=1
aspectr=c(1,1,0.7)
pointsalpha=1
userMatrix=matrix(c(0.80,-0.60,0.022,0,0.23,0.34,0.91,0,-0.55,-0.72,0.41,0,0,0,0,1),ncol=4,byrow=T)
windowRect=c(0,29,1920,1032)
cols=c("red","forestgreen","blue")
alph=0.05
plotbag = function(x,alpha=alph,grp=1,cols=c("red","forestgreen","blue"),transp=0.2) {
propinside=1
target=1-alpha
x2=x
levels=unique(x2[,ncol(x2)])
x2=x2[x2[,ncol(x2)]==levels[[grp]],]
n=nrow(x2)
while (propinside>target) {
propinside=nrow(x2)/n
hull=unique(as.vector(convhulln(as.matrix(x2[,1:3]), options = "Tv")))
x2old=x2
x2=x2[-hull,]
}
ids=t(convhulln(as.matrix(x2old[,1:3]), options = "Tv"))
rgl.triangles(x2old[ids,1],x2old[ids,2],x2old[ids,3],col=cols[[grp]],alpha=transp,shininess=50)
}
open3d(zoom=zoom,userMatrix=userMatrix,windowRect=windowRect,antialias=8)
for (i in 1:nlevels) {
plot3d(x=df[df[,ncol(df)]==levels[[i]],][,1],
y=df[df[,ncol(df)]==levels[[i]],][,2],
z=df[df[,ncol(df)]==levels[[i]],][,3],
type="s",
col=cols[[i]],
size=cex,
lit=TRUE,
alpha=pointsalpha,point_antialias=TRUE,
line_antialias=TRUE,shininess=50, add=TRUE)
plotbag(df,alpha=alph, grp=i, cols=c("red","forestgreen","blue"), transp=0.3) }
axes3d(color="black",drawfront=T,box=T,alpha=1)
title3d(color="black",xlab=names(df)[[1]],ylab=names(df)[[2]],zlab=names(df)[[3]],alpha=1)
aspect3d(aspectr)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With