Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

matplotlib pandas: Change width between datapoints based on another columns value

Tried my best to bring it in one picture:

matplotlib-set-width-based-on-column

I would like to increase the width of such lines between two datapoints which have a higher z value. Painting them red when they go over a certain threshold would be nice..

  1. Is that possible with matplotlib?
  2. Or with an alternative libary which works on pd.DataFrame()?
  3. Are there certain limits to keep in mind when ploting this way with bigger datasets?

Example mock up:

import pandas as pd
import numpy as np
from datetime import datetime, timedelta

date_today = datetime.now()
days = pd.date_range(date_today, date_today + timedelta(minutes=1), freq='s')

np.random.seed(seed=1111)
y = np.random.randint(10, high=13, size=len(days))
z = np.random.randint(1, high=10, size=len(days))
df = pd.DataFrame({'ts': days, 'y': y, 'z': z})
df = df.set_index('ts')
print(df)

df.y.plot()
like image 456
gies0r Avatar asked May 30 '26 12:05

gies0r


2 Answers

Here is another approach, based on using a LineCollection like in this example.

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
from matplotlib.dates import DateFormatter, date2num, SecondLocator

date_today = datetime.now()
days = pd.date_range(date_today, date_today + timedelta(minutes=1), freq='s')

np.random.seed(seed=1111)
y = np.random.randint(10, high=13, size=len(days))
z = np.random.randint(1, high=10, size=len(days))
df = pd.DataFrame({'ts': days, 'y': y, 'z': z})
df = df.set_index('ts')

# Make each line segment
points = np.array([date2num(df.index), y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)

# Make the line collection, setting the default line color to red if z > 5
# otherwise blue, and linewidth to 2 if z >= previous value, otherwise 1
lc = LineCollection(
    segments,
    color=np.where(z > 5, 'red', 'blue'),
    linewidth=np.where(z[1:] >= z[:-1], 2, 1),
)

# Add it to the plot and auto scale axes to take lines into account
fig, ax = plt.subplots()
ax.add_collection(lc)
ax.autoscale()

# Format the x-axis nicely
ax.xaxis.set_major_locator(SecondLocator(range(0, 60, 10)))
ax.xaxis.set_major_formatter(DateFormatter('%H:%M:%S'))
plt.show()

This gives the following plot enter image description here

like image 124
tomjn Avatar answered Jun 02 '26 21:06

tomjn


Here's how I would do it: basically I'm adding conditional rows (df'[linewidth]', df['linecolors']) to the dataframe, which hold the plot options for plotting line segments, and plot the data as line segments (i.e. not a continous line), based on this answer to a similar question

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

date_today = datetime.now()
days = pd.date_range(date_today, date_today + timedelta(minutes=1), freq='s')

# np.random.seed(seed=1111)
# y = np.random.randint(10, high=13, size=len(days))
# z = np.random.randint(1, high=10, size=len(days))

## note that I'm using a cosine function here instead, 
#  in order to make it easier to see that the code is working
y = np.cos(np.linspace(0,4*np.pi,len(days)))
z = y

df = pd.DataFrame({'ts': days, 'y': y, 'z': z})
df = df.set_index('ts')

## create new columns to hold the linewidths and associated colors, depending on a threshold value
threshold = -.25
thick_linewidth = 5
thin_linewidth = 1
thick_color = "r"
thin_color = "b"

df['threshold_bool']  =  np.where(df['z']>= threshold, 1, 0) ### only for debug, you don't really need this
df['linewidth'] =  np.where(df['z']>= threshold, thick_linewidth, thin_linewidth)
df['linecolors'] =  np.where(df['z']>= threshold, thick_color, thin_color)

def plot_widths(xs, ys, widths, colors, ax=None, xlim=None, ylim=None,
                **kwargs):
    if not (len(xs) == len(ys) == len(widths)):
        raise ValueError('xs, ys, and widths must have identical lengths')
    fig = None
    if ax is None:
        fig, ax = plt.subplots(1)

    segmentx, segmenty = [xs[0]], [ys[0]]
    current_width = widths[0]
    color = colors[0]
    
    ## to debug the visualisation use:
    ax.scatter(xs.values,ys.values,edgecolors="k",facecolors="w",marker='o',s=12**2,zorder=19)
    for (x,y,z) in zip(xs.values,ys.values,df['threshold_bool'].values):
        ax.text(x,y,z,ha="center",va="center",zorder=20)
    #####
    
    for ii, (x, y, width) in enumerate(zip(xs, ys, widths)):
        segmentx.append(x)
        segmenty.append(y)
        if (width != current_width) or (ii == (len(xs) - 1)):
            
                
            ax.plot(segmentx, segmenty, linewidth=current_width, color=color,
                    **kwargs)
            segmentx, segmenty = [x], [y]
            
            current_width = width
            if width == thick_linewidth:
                color = thick_color
            else:
                color = thin_color
                
    if xlim is None:
        xlim = [min(xs), max(xs)]
    if ylim is None:
        ylim = [min(ys), max(ys)]
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)

    return ax if fig is None else fig

fig,ax = plt.subplots()
plot_widths(df.index, df.y, df.linewidth, df.linecolors, ax=ax, )
ax.axhline(threshold,linestyle="dashed",color="r")
plt.show()

Which yields:

Variable line widths

Note that I added scatter points & text via df['threshold_bool'], in order to highlight how the code decides whether a segment is above or below the threshold, see comments in the code.

like image 29
Asmus Avatar answered Jun 02 '26 20:06

Asmus



Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!