Here is my first geodatframe :
!pip install geopandas
import pandas as pd
import geopandas
city1 = [{'City':"Buenos Aires","Country":"Argentina","Latitude":-34.58,"Longitude":-58.66},
{'City':"Brasilia","Country":"Brazil","Latitude":-15.78 ,"Longitude":-70.66},
{'City':"Santiago","Country":"Chile ","Latitude":-33.45 ,"Longitude":-70.66 }]
city2 = [{'City':"Bogota","Country":"Colombia ","Latitude":4.60 ,"Longitude":-74.08},
{'City':"Caracas","Country":"Venezuela","Latitude":10.48 ,"Longitude":-66.86}]
city1df = pd.DataFrame(city1)
city2df = pd.DataFrame(city2)
gcity1df = geopandas.GeoDataFrame(
city1df, geometry=geopandas.points_from_xy(city1df.Longitude, city1df.Latitude))
gcity2df = geopandas.GeoDataFrame(
city2df, geometry=geopandas.points_from_xy(city2df.Longitude, city2df.Latitude))
City1
City Country Latitude Longitude geometry
0 Buenos Aires Argentina -34.58 -58.66 POINT (-58.66000 -34.58000)
1 Brasilia Brazil -15.78 -47.91 POINT (-47.91000 -15.78000)
2 Santiago Chile -33.45 -70.66 POINT (-70.66000 -33.45000)
and my second geodataframe : City2 :
City Country Latitude Longitude geometry
1 Bogota Colombia 4.60 -74.08 POINT (-74.08000 4.60000)
2 Caracas Venezuela 10.48 -66.86 POINT (-66.86000 10.48000)
i would like third dataframe with the nearest city from city1 to city2 with the distance like :
City Country Latitude Longitude geometry Nearest Distance
0 Buenos Aires Argentina -34.58 -58.66 POINT (-58.66000 -34.58000) Bogota 111 Km
Here is my actual solution using geodjango and dict (but it's way too long) :
from django.contrib.gis.geos import GEOSGeometry
result = []
dict_result = {}
for city01 in city1 :
dist = 99999999
pnt = GEOSGeometry('SRID=4326;POINT( '+str(city01["Latitude"])+' '+str(city01['Longitude'])+')')
for city02 in city2:
pnt2 = GEOSGeometry('SRID=4326;POINT('+str(city02['Latitude'])+' '+str(city02['Longitude'])+')')
distance_test = pnt.distance(pnt2) * 100
if distance_test < dist :
dist = distance_test
result.append(dist)
dict_result[city01['City']] = city02['City']
Here are my tryings :
from shapely.ops import nearest_points
# unary union of the gpd2 geomtries
pts3 = gcity2df.geometry.unary_union
def Euclidean_Dist(df1, df2, cols=['x_coord','y_coord']):
return np.linalg.norm(df1[cols].values - df2[cols].values,
axis=1)
def near(point, pts=pts3):
# find the nearest point and return the corresponding Place value
nearest = gcity2df.geometry == nearest_points(point, pts)[1]
return gcity2df[nearest].City
gcity1df['Nearest'] = gcity1df.apply(lambda row: near(row.geometry), axis=1)
gcity1df
here :
City Country Latitude Longitude geometry Nearest
0 Buenos Aires Argentina -34.58 -58.66 POINT (-58.66000 -34.58000) Bogota
1 Brasilia Brazil -15.78 -70.66 POINT (-70.66000 -15.78000) Bogota
2 Santiago Chile -33.45 -70.66 POINT (-70.66000 -33.45000) Bogota
Regards
Firstly, I merge two data frames by cross join. And then, I found distance between two points using map
in python. I use map
, because most of the time it is much faster than apply
, itertuples
, iterrows
etc. (Reference: https://stackoverflow.com/a/52674448/8205554)
Lastly, I group by data frame and fetch minimum values of distance.
Here are libraries,
import pandas as pd
import geopandas
import geopy.distance
from math import radians, cos, sin, asin, sqrt
Here are used functions,
def dist1(p1, p2):
lon1, lat1, lon2, lat2 = map(radians, [p1.x, p1.y, p2.x, p2.y])
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
return c * 6373
def dist2(p1, p2):
lon1, lat1, lon2, lat2 = map(radians, [p1[0], p1[1], p2[0], p2[1]])
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
return c * 6373
def dist3(p1, p2):
x = p1.y, p1.x
y = p2.y, p2.x
return geopy.distance.geodesic(x, y).km
def dist4(p1, p2):
x = p1[1], p1[0]
y = p2[1], p2[0]
return geopy.distance.geodesic(x, y).km
And data,
city1 = [
{
'City': 'Buenos Aires',
'Country': 'Argentina',
'Latitude': -34.58,
'Longitude': -58.66
},
{
'City': 'Brasilia',
'Country': 'Brazil',
'Latitude': -15.78,
'Longitude': -70.66
},
{
'City': 'Santiago',
'Country': 'Chile ',
'Latitude': -33.45,
'Longitude': -70.66
}
]
city2 = [
{
'City': 'Bogota',
'Country': 'Colombia ',
'Latitude': 4.6,
'Longitude': -74.08
},
{
'City': 'Caracas',
'Country': 'Venezuela',
'Latitude': 10.48,
'Longitude': -66.86
}
]
city1df = pd.DataFrame(city1)
city2df = pd.DataFrame(city2)
Cross join with geopandas
data frames,
gcity1df = geopandas.GeoDataFrame(
city1df,
geometry=geopandas.points_from_xy(city1df.Longitude, city1df.Latitude)
)
gcity2df = geopandas.GeoDataFrame(
city2df,
geometry=geopandas.points_from_xy(city2df.Longitude, city2df.Latitude)
)
# cross join geopandas
gcity1df['key'] = 1
gcity2df['key'] = 1
merged = gcity1df.merge(gcity2df, on='key')
math
functions and geopandas
,
# 6.64 ms ± 588 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
%%timeit
# find distance
merged['dist'] = list(map(dist1, merged['geometry_x'], merged['geometry_y']))
mapping = {
'City_x': 'City',
'Country_x': 'Country',
'Latitude_x': 'Latitude',
'Longitude_x': 'Longitude',
'geometry_x': 'geometry',
'City_y': 'Nearest',
'dist': 'Distance'
}
nearest = merged.loc[merged.groupby(['City_x', 'Country_x'])['dist'].idxmin()]
nearest.rename(columns=mapping)[list(mapping.values())]
City Country Latitude Longitude geometry \
2 Brasilia Brazil -15.78 -70.66 POINT (-70.66000 -15.78000)
0 Buenos Aires Argentina -34.58 -58.66 POINT (-58.66000 -34.58000)
4 Santiago Chile -33.45 -70.66 POINT (-70.66000 -33.45000)
Nearest Distance
2 Bogota 2297.922808
0 Bogota 4648.004515
4 Bogota 4247.586882
geopy
and geopandas
,
# 9.99 ms ± 764 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
%%timeit
# find distance
merged['dist'] = list(map(dist3, merged['geometry_x'], merged['geometry_y']))
mapping = {
'City_x': 'City',
'Country_x': 'Country',
'Latitude_x': 'Latitude',
'Longitude_x': 'Longitude',
'geometry_x': 'geometry',
'City_y': 'Nearest',
'dist': 'Distance'
}
nearest = merged.loc[merged.groupby(['City_x', 'Country_x'])['dist'].idxmin()]
nearest.rename(columns=mapping)[list(mapping.values())]
City Country Latitude Longitude geometry \
2 Brasilia Brazil -15.78 -70.66 POINT (-70.66000 -15.78000)
0 Buenos Aires Argentina -34.58 -58.66 POINT (-58.66000 -34.58000)
4 Santiago Chile -33.45 -70.66 POINT (-70.66000 -33.45000)
Nearest Distance
2 Bogota 2285.239605
0 Bogota 4628.641817
4 Bogota 4226.710978
If you want to use pandas
instead of geopandas
,
# cross join pandas
city1df['key'] = 1
city2df['key'] = 1
merged = city1df.merge(city2df, on='key')
With math
functions,
# 8.65 ms ± 2.21 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)
%%timeit
# find distance
merged['dist'] = list(
map(
dist2,
merged[['Longitude_x', 'Latitude_x']].values,
merged[['Longitude_y', 'Latitude_y']].values
)
)
mapping = {
'City_x': 'City',
'Country_x': 'Country',
'Latitude_x': 'Latitude',
'Longitude_x': 'Longitude',
'City_y': 'Nearest',
'dist': 'Distance'
}
nearest = merged.loc[merged.groupby(['City_x', 'Country_x'])['dist'].idxmin()]
nearest.rename(columns=mapping)[list(mapping.values())]
City Country Latitude Longitude Nearest Distance
2 Brasilia Brazil -15.78 -70.66 Bogota 2297.922808
0 Buenos Aires Argentina -34.58 -58.66 Bogota 4648.004515
4 Santiago Chile -33.45 -70.66 Bogota 4247.586882
With geopy
,
# 9.8 ms ± 807 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
%%timeit
# find distance
merged['dist'] = list(
map(
dist4,
merged[['Longitude_x', 'Latitude_x']].values,
merged[['Longitude_y', 'Latitude_y']].values
)
)
mapping = {
'City_x': 'City',
'Country_x': 'Country',
'Latitude_x': 'Latitude',
'Longitude_x': 'Longitude',
'City_y': 'Nearest',
'dist': 'Distance'
}
nearest = merged.loc[merged.groupby(['City_x', 'Country_x'])['dist'].idxmin()]
nearest.rename(columns=mapping)[list(mapping.values())]
City Country Latitude Longitude Nearest Distance
2 Brasilia Brazil -15.78 -70.66 Bogota 2285.239605
0 Buenos Aires Argentina -34.58 -58.66 Bogota 4628.641817
4 Santiago Chile -33.45 -70.66 Bogota 4226.710978
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With