I drew a scatterplot using the matplotlib library of Pandas, and I want to connect the markers with the same name with each other.
I want the color of the line connecting the markers to be different in proportion to the distance between the two markers of the same name.
And I want to set the thickness of the line according to the distance of two markers of the same name.
How should I modify the code to make it work the way I want it to?
my code is
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
df = pd.DataFrame({'population' : [673507,341649,408147,603611,548160,322915,371063,409385,431920,386359,364338], 'number' : [1586,858,1068,1617,1906,1265,1581,1780,1879,1935,2003], 'error' : [1408.212414,1189.619423,1169.298565,1177.840805,730.672166,729.182492,550.520026,460.829841,426.108759,240.150393,109.337611], 'index' : ['a','b','c','d','e','f','g','h','i','j','k'], 'ratio' : [0.235484,0.251135,0.261670,0.267888,0.347709,0.391744,0.426073,0.434799,0.435034,0.500830,0.549764]})
df1 = pd.DataFrame({'population' : [667480,353241,418620,595691,578221,334426,375180,412774,463321,398351,373824], 'number' : [974,515,872,909,1342,691,746,993,1027,1009,1437], 'error' : [835.205610,672.395313,444.766064,758.150870,290.581558,459.164541,484.807799,312.198108,378.219572,267.658143,208.875429], 'index' : ['a','b','c','d','e','f','g','h','i','j','k'], 'ratio' : [0.145922,0.145793,0.208303,0.152596,0.232091,0.206623,0.198838,0.240567,0.221661,0.253294,0.384405]})
df.set_index('index', inplace=True)
df1.set_index('index', inplace=True)
pf = np.polyfit(df['population'], df['number'], 1)
fy = np.poly1d(pf)
fy
fx = np.linspace(100000, 700000, 100)
pf1 = np.polyfit(df1['population'], df1['number'], 1)
fy1 = np.poly1d(pf1)
fx1 = np.linspace(100000, 700000, 100)
x1 = df1
y1 = df1
x2 = df
y2 = df
x1 = x1.reset_index()
x1 = x1.sort_values(by='index', ascending=True)
x1 = x1.drop(['index'], axis=1)
x1 = x1.reset_index(drop=True)
x2 = x2.reset_index()
x2 = x2.sort_values(by='index', ascending=True)
x2 = x2.drop(['index'], axis=1)
x2 = x2.reset_index(drop=True)
y1 = y1.reset_index()
y1 = y1.sort_values(by='index', ascending=True)
y1 = y1.drop(['index'], axis=1)
y1 = y1.reset_index(drop=True)
y2 = y2.reset_index()
y2 = y2.sort_values(by='index', ascending=True)
y2 = y2.drop(['index'], axis=1)
y2 = y2.reset_index(drop=True)
plt.figure(figsize=(14,10))
plt.scatter(df['population'], df['number'], c=df['error'], s=2000*df['ratio'])
plt.plot(fx1, fy1(fx1), ls='solid', lw=3, color='g')
for n in range(len(df)):
plt.text(df['population'][n] + 5000,
df['number'][n] - 20,
df.index[n],
fontsize=15)
plt.scatter(df1['population'], df1['number'], c=df1['error'], s=2000*df1['ratio'], alpha=0.5)
plt.plot(fx, fy(fx), ls='dashed', lw=3, color='g', alpha=0.3)
for n in range(len(df1)):
plt.text(df1['population'][n] + 5000,
df1['number'][n] - 20,
df1.index[n],
fontsize=15)
plt.colorbar()
plt.plot([x1['population'], x2['population']], [y1['number'], y2['number']], c='r', linewidth=10, alpha=0.5)
plt.title('Plot')
plt.xlabel('Population')
plt.ylabel('Number')
plt.ylim(0,6000)
plt.grid()
plt.show()
result
The return value of the last plot command, is a list of the individual lines that are drawn. (Optionally, the z-order can be set to zero, so the lines appear behind the scatter dots. Leaving out the alpha makes the lines more visible.)
np.linalg.norm called on the difference between the coordinates can calculate the lengths of the lines.
plt.Normalize(vmin=..., vmax=...) creates a function that linearly maps its input values such that vmin will be mapped to zero and vmax to 1. This can be used to convert a length to a value between 0 and 1 for a colormap. It also can be used to calculate a variable thickness.
Looping through the lines and their respective lengths can set a specific color and thickness:
lines = plt.plot([x1['population'], x2['population']], [y1['number'], y2['number']], zorder=0)
lengths = nplinalg.norm(np.vstack([x1['population'] - x2['population'], x1['number'] - x2['number']]), axis=0)
norm = plt.Normalize(vmin=distances.min(), vmax=distances.max())
cmap = plt.get_cmap('coolwarm')
for line, length in zip(lines, lengths):
line.set_color(cmap(norm(length)))
line.set_linewidth(2 + 9 * norm(length))

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With