say I have two sets of shape files that cover the same region and often, but not always share borders, e.g. US counties and PUMAs. I'd like to define a new scale of polygon that uses both PUMAs and counties as atomic building blocks, i.e. neither can ever be split, but I'd still like as many units as possible. Here is a toy example:
library(sp)
# make fake data
# 1) counties:
Cty <- SpatialPolygons(list(
Polygons(list(Polygon(cbind(x=c(0,2,2,1,0,0),y=c(0,0,2,2,1,0)), hole=FALSE)),"county1"),
Polygons(list(Polygon(cbind(x=c(2,4,4,3,3,2,2),y=c(0,0,2,2,1,1,0)),hole=FALSE)),"county2"),
Polygons(list(Polygon(cbind(x=c(4,5,5,4,4),y=c(0,0,3,2,0)),hole=FALSE)),"county3"),
Polygons(list(Polygon(cbind(x=c(0,1,2,2,0,0),y=c(1,2,2,3,3,1)),hole=FALSE)),"county4"),
Polygons(list(Polygon(cbind(x=c(2,3,3,4,4,3,3,2,2),y=c(1,1,2,2,3,3,4,4,1)),hole=FALSE)),"county5"),
Polygons(list(Polygon(cbind(x=c(0,2,2,1,0,0),y=c(3,3,4,5,5,3)),hole=FALSE)),"county6"),
Polygons(list(Polygon(cbind(x=c(1,2,3,4,1),y=c(5,4,4,5,5)),hole=FALSE)),"county7"),
Polygons(list(Polygon(cbind(x=c(3,4,4,5,5,4,3,3),y=c(3,3,2,3,5,5,4,3)),hole=FALSE)),"county8")
))
counties <- SpatialPolygonsDataFrame(Cty, data = data.frame(ID=paste0("county",1:8),
row.names=paste0("county",1:8),
stringsAsFactors=FALSE)
)
# 2) PUMAs:
Pum <- SpatialPolygons(list(
Polygons(list(Polygon(cbind(x=c(0,4,4,3,3,2,2,1,0,0),y=c(0,0,2,2,1,1,2,2,1,0)), hole=FALSE)),"puma1"),
Polygons(list(Polygon(cbind(x=c(4,5,5,4,3,3,4,4),y=c(0,0,5,5,4,3,3,0)),hole=FALSE)),"puma2"),
Polygons(list(Polygon(cbind(x=c(0,1,2,2,3,3,2,0,0),y=c(1,2,2,1,1,2,3,3,1)),hole=FALSE)),"puma3"),
Polygons(list(Polygon(cbind(x=c(2,3,4,4,3,3,2,2),y=c(3,2,2,3,3,4,4,3)),hole=FALSE)),"puma4"),
Polygons(list(Polygon(cbind(x=c(0,1,1,3,4,0,0),y=c(3,3,4,4,5,5,3)),hole=FALSE)),"puma5"),
Polygons(list(Polygon(cbind(x=c(1,2,2,1,1),y=c(3,3,4,4,3)),hole=FALSE)),"puma6")
))
Pumas <- SpatialPolygonsDataFrame(Pum, data = data.frame(ID=paste0("puma",1:6),
row.names=paste0("puma",1:6),
stringsAsFactors=FALSE)
)
# desired result:
Cclust <- SpatialPolygons(list(
Polygons(list(Polygon(cbind(x=c(0,4,4,3,3,2,2,1,0,0),y=c(0,0,2,2,1,1,2,2,1,0)), hole=FALSE)),"ctyclust1"),
Polygons(list(Polygon(cbind(x=c(4,5,5,4,3,3,4,4),y=c(0,0,5,5,4,3,3,0)),hole=FALSE)),"ctyclust2"),
Polygons(list(Polygon(cbind(x=c(0,1,2,2,3,3,4,4,3,3,2,2,0,0),y=c(1,2,2,1,1,2,2,3,3,4,4,3,3,1)),hole=FALSE)),"ctyclust3"),
Polygons(list(Polygon(cbind(x=c(0,2,2,3,4,0,0),y=c(3,3,4,4,5,5,3)),hole=FALSE)),"ctyclust4")
))
CtyClusters <- SpatialPolygonsDataFrame(Cclust, data = data.frame(ID = paste0("ctyclust", 1:4),
row.names = paste0("ctyclust", 1:4),
stringsAsFactors=FALSE)
)
# take a look
par(mfrow = c(1, 2))
plot(counties, border = gray(.3), lwd = 4)
plot(Pumas, add = TRUE, border = "#EEBB00", lty = 2, lwd = 2)
legend(-.5, -.3, lty = c(1, 2), lwd = c(4, 2), col = c(gray(.3), "#EEBB00"),
legend = c("county line", "puma line"), xpd = TRUE, bty = "n")
text(coordinates(counties), counties@data$ID,col = gray(.3))
text(coordinates(Pumas), Pumas@data$ID, col = "#EEBB00",cex=1.5)
title("building blocks")
#desired result:
plot(CtyClusters)
title("desired result")
text(-.5, -.5, "maximum units possible,\nwithout breaking either PUMAs or counties",
xpd = TRUE, pos = 4)
I've naively tried many of the g* functions in the rgeos package to achieve this and have not made headway. Does anyone know of a nice function or awesome trick for this task? Thank you!
[I'm also open to suggestions on a better title]
Here's a relatively succinct solution which:
Uses rgeos::gRelate()
to identify Pumas that overlap but don't completely encompass/cover each county.To understand what it does, run example(gRelate)
and see this Wikipedia page. (h.t. to Tim Riffe)
Uses RBGL::connectedComp()
to identify groups of Pumas that should thus be merged. (For pointers on installing the RBGL package, see my answer to this SO question.)
Uses rgeos::gUnionCascaded()
to merge the indicated Pumas.
library(rgeos)
library(RBGL)
## Identify groups of Pumas that should be merged
x <- gRelate(counties, Pumas, byid=TRUE)
x <- matrix(grepl("2.2......", x), ncol=ncol(x), dimnames=dimnames(x))
k <- x %*% t(x)
l <- connectedComp(as(k, "graphNEL"))
## Extend gUnionCascaded so that each SpatialPolygon gets its own ID.
gMerge <- function(ii) {
x <- gUnionCascaded(Pumas[ii,])
spChFIDs(x, paste(ii, collapse="_"))
}
## Merge Pumas as needed
res <- do.call(rbind, sapply(l, gMerge))
plot(res)
I think you could do this with a smart set of tests for containment. This gets two of your parts, the simple paired case where puma1
contains county1
and county2
, and puma2
contains county8
and county3
.
library(rgeos)
## pumas by counties
pbyc <- gContains(Pumas, counties, byid = TRUE)
## row / col pairs of where contains test is TRUE for Pumas
pbyc.pairs <- cbind(row(pbyc)[pbyc], col(pbyc)[pbyc])
par(mfrow = c(nrow(pbyc.pairs), 1))
for (i in 1:nrow(pbyc.pairs)) {
plot(counties, col = "white")
plot(gUnion(counties[pbyc.pairs[i,1], ], Pumas[pbyc.pairs[i,2], ]), col = "red", add = TRUE)
}
The plotting there is dumbly redundant, but I think it shows a start. You need to find which contains tests accumulate the most smaller parts, and then union those. Sorry I haven't put the effort in to finish but I think this would work.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With