I have ~400K documents in a mongo collection, all with geometry of type:Polygon
. It is not possible to add a 2dsphere
index to the data as it currently stands because the geometry apparently has self-intersections.
In the past we had a hacky workaround which was to compute the bounding box of the geometry on a mongoose save hook and then index that rather than the geometry itself, but we would like to simplify things and just use the actual geometry.
So far I have tried using turf as follows (this is the body of a function called fix
):
let geom = turf.polygon(geometry.coordinates);
geom = turf.simplify(geom, { tolerance: 1e-7 });
geom = turf.cleanCoords(geom);
geom = turf.unkinkPolygon(geom);
geom = turf.combine(geom);
return geom.features[0].geometry;
The most important function there is the unkinkPolygons
which I hoped would do exactly what I wanted, i.e. make the geometry nice enough to be indexed. The simplify
is possibly not helpful but I added it in for good measure. The clean
is there because unkink
complained about its input, and the combine
is there to turn an array of Polygon
s into a single MultiPolygon
. Actually, unkink
still wasn't happy with it's inputs, so I had to write a hacky function as follows that jitters duplicated vertices, this modifies the geom
before passing to unkink
:
function jitterDups(geom) {
let coords = geom.geometry.coordinates;
let points = new Set();
for (let ii = 0; ii < coords.length; ii++) {
// last coords is allowed to match first, not sure if it must match.
let endsMatch = coords[ii][0].join(",") === coords[ii][coords[ii].length - 1].join(",");
for (let jj = 0; jj < coords[ii].length - (endsMatch ? 1 : 0); jj++) {
let str = coords[ii][jj].join(",");
while (points.has(str)) {
coords[ii][jj][0] += 1e-8; // if you make this too small it doesn't do the job
if (jj === 0 && endsMatch) {
coords[ii][coords[ii].length - 1][0] = coords[ii][jj][0];
}
str = coords[ii][jj].join(",");
}
points.add(str);
}
}
}
However, even after all of that mongo still complains.
Here is some sample raw Polygon
input:
{ type: "Polygon", coordinates: [ [ [ -0.027542009179339, 51.5122867222457 ], [ -0.027535822940572, 51.512281465421 ], [ -0.027535925691804, 51.5122814221859 ], [ -0.027589474043984, 51.5122605515771 ], [ -0.027638484531731, 51.5122996934574 ], [ -0.027682911101528, 51.5123351881505 ], [ -0.027689915350493, 51.5123872384419 ], [ -0.027672409315982, 51.5123868001613 ], [ -0.027667905522642, 51.5123866344944 ], [ -0.027663068941865, 51.5123864992013 ], [ -0.02764931654289, 51.512375566682 ], [ -0.027552504539425, 51.5122983194123 ], [ -0.027542009179339, 51.5122867222457 ] ], [ [ -0.027542009179339, 51.5122867222457 ], [ -0.027557948301911, 51.5122984109658 ], [ -0.027560309178214, 51.5123001412876 ], [ -0.027542009179339, 51.5122867222457 ] ] ] }
And that same data after it has passed through the above fixing pipeline:
{ type: "MultiPolygon", coordinates: [ [ [ [ -0.027560309178214, 51.5123001412876 ], [ -0.02754202882236209, 51.51228674396312 ], [ -0.027542009179339, 51.5122867222457 ], [ -0.027535822940572, 51.512281465421 ], [ -0.027589474043984, 51.5122605515771 ], [ -0.027682911101528, 51.5123351881505 ], [ -0.027689915350493, 51.5123872384419 ], [ -0.027663068941865, 51.5123864992013 ], [ -0.027552504539425, 51.5122983194123 ], [ -0.02754202884162257, 51.51228674398443 ], [ -0.027557948301911, 51.5122984109658 ], [ -0.027560309178214, 51.5123001412876 ] ] ], [ [ [ -0.02754202884162257, 51.51228674398443 ], [ -0.02754202882236209, 51.51228674396312 ], [ -0.027541999179339, 51.5122867222457 ], [ -0.02754202884162257, 51.51228674398443 ] ] ] ] }
And here is the relevant bit of the error that is spat out by the index creation:
Edges 0 and 9 cross.
Edge locations in degrees: [-0.0275603, 51.5123001]-[-0.0275420, 51.5122867] and [-0.0275420, 51.5122867]-[-0.0275579, 51.5122984]
"code" : 16755,
"codeName" : "Location16755"
My question is: is there a bug in turf
, or is it not doing what I need here in terms of keeping mongo happy? Also is there any documentation on exactly what the 2dshpere
index needs in terms of "fixing"? Also, does anyone have suggestions as to what other tools I might use to fix the data, e.g. mapshaper or PostGIS's ST_MakeValid.
Note that once the existing data is fixed I also need a solution for fixing new data on the fly (ideally something that works nice with node).
Mongo Version: 3.4.14 (or any later 3.x)
A 2dsphere index supports queries that calculate geometries on an earth-like sphere. 2dsphere index supports all MongoDB geospatial queries: queries for inclusion, intersection and proximity. For more information on geospatial queries, see Geospatial Queries.
MongoDB geospatial queries can interpret geometry on a flat surface or a sphere. 2dsphere indexes support only spherical queries (i.e. queries that interpret geometries on a spherical surface). 2d indexes support flat queries (i.e. queries that interpret geometries on a flat surface) and some spherical queries.
a 2-sphere is an ordinary 2-dimensional sphere in 3-dimensional Euclidean space, and is the boundary of an ordinary ball (3-ball). a 3-sphere is a 3-dimensional sphere in 4-dimensional Euclidean space.
The problem here is not that the polygon is intersecting itself, but rather that you have a (tiny) hole in the polygon, composed of 4 points, which shares a point with the exterior. So the hole "touches" the exterior, not intersects with it, but this is not allowed. You can fix such cases using Shapely buffer with a tiny value, e.g.:
shp = shapely.geometry.shape({ "type": "Polygon", "coordinates": [ [ [ -0.027542009179339, 51.5122867222457 ], [ -0.027535822940572, 51.512281465421 ], [ -0.027535925691804, 51.5122814221859 ], [ -0.027589474043984, 51.5122605515771 ], [ -0.027638484531731, 51.5122996934574 ], [ -0.027682911101528, 51.5123351881505 ], [ -0.027689915350493, 51.5123872384419 ], [ -0.027672409315982, 51.5123868001613 ], [ -0.027667905522642, 51.5123866344944 ], [ -0.027663068941865, 51.5123864992013 ], [ -0.02764931654289, 51.512375566682 ], [ -0.027552504539425, 51.5122983194123 ], [ -0.027542009179339, 51.5122867222457 ] ], [ [ -0.027542009179339, 51.5122867222457 ], [ -0.027557948301911, 51.5122984109658 ], [ -0.027560309178214, 51.5123001412876 ], [ -0.027542009179339, 51.5122867222457 ] ] ] })
shp = shp.buffer(1e-12, resolution=0)
geojson = shapely.geometry.mapping(shp)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With