matplotlib pandas: Change width between datapoints based on another columns value

Question

Tried my best to bring it in one picture:

matplotlib-set-width-based-on-column

I would like to increase the width of such lines between two datapoints which have a higher z value. Painting them red when they go over a certain threshold would be nice..

Is that possible with matplotlib?
Or with an alternative libary which works on pd.DataFrame()?
Are there certain limits to keep in mind when ploting this way with bigger datasets?

Example mock up:

import pandas as pd
import numpy as np
from datetime import datetime, timedelta

date_today = datetime.now()
days = pd.date_range(date_today, date_today + timedelta(minutes=1), freq='s')

np.random.seed(seed=1111)
y = np.random.randint(10, high=13, size=len(days))
z = np.random.randint(1, high=10, size=len(days))
df = pd.DataFrame({'ts': days, 'y': y, 'z': z})
df = df.set_index('ts')
print(df)

df.y.plot()

tomjn · Accepted Answer

Here is another approach, based on using a LineCollection like in this example.

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
from matplotlib.dates import DateFormatter, date2num, SecondLocator

date_today = datetime.now()
days = pd.date_range(date_today, date_today + timedelta(minutes=1), freq='s')

np.random.seed(seed=1111)
y = np.random.randint(10, high=13, size=len(days))
z = np.random.randint(1, high=10, size=len(days))
df = pd.DataFrame({'ts': days, 'y': y, 'z': z})
df = df.set_index('ts')

# Make each line segment
points = np.array([date2num(df.index), y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)

# Make the line collection, setting the default line color to red if z > 5
# otherwise blue, and linewidth to 2 if z >= previous value, otherwise 1
lc = LineCollection(
    segments,
    color=np.where(z > 5, 'red', 'blue'),
    linewidth=np.where(z[1:] >= z[:-1], 2, 1),
)

# Add it to the plot and auto scale axes to take lines into account
fig, ax = plt.subplots()
ax.add_collection(lc)
ax.autoscale()

# Format the x-axis nicely
ax.xaxis.set_major_locator(SecondLocator(range(0, 60, 10)))
ax.xaxis.set_major_formatter(DateFormatter('%H:%M:%S'))
plt.show()

This gives the following plot enter image description here

Asmus · Answer

Here's how I would do it: basically I'm adding conditional rows (df'[linewidth]', df['linecolors']) to the dataframe, which hold the plot options for plotting line segments, and plot the data as line segments (i.e. not a continous line), based on this answer to a similar question

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

date_today = datetime.now()
days = pd.date_range(date_today, date_today + timedelta(minutes=1), freq='s')

# np.random.seed(seed=1111)
# y = np.random.randint(10, high=13, size=len(days))
# z = np.random.randint(1, high=10, size=len(days))

## note that I'm using a cosine function here instead, 
#  in order to make it easier to see that the code is working
y = np.cos(np.linspace(0,4*np.pi,len(days)))
z = y

df = pd.DataFrame({'ts': days, 'y': y, 'z': z})
df = df.set_index('ts')

## create new columns to hold the linewidths and associated colors, depending on a threshold value
threshold = -.25
thick_linewidth = 5
thin_linewidth = 1
thick_color = "r"
thin_color = "b"

df['threshold_bool']  =  np.where(df['z']>= threshold, 1, 0) ### only for debug, you don't really need this
df['linewidth'] =  np.where(df['z']>= threshold, thick_linewidth, thin_linewidth)
df['linecolors'] =  np.where(df['z']>= threshold, thick_color, thin_color)

def plot_widths(xs, ys, widths, colors, ax=None, xlim=None, ylim=None,
                **kwargs):
    if not (len(xs) == len(ys) == len(widths)):
        raise ValueError('xs, ys, and widths must have identical lengths')
    fig = None
    if ax is None:
        fig, ax = plt.subplots(1)

    segmentx, segmenty = [xs[0]], [ys[0]]
    current_width = widths[0]
    color = colors[0]
    
    ## to debug the visualisation use:
    ax.scatter(xs.values,ys.values,edgecolors="k",facecolors="w",marker='o',s=12**2,zorder=19)
    for (x,y,z) in zip(xs.values,ys.values,df['threshold_bool'].values):
        ax.text(x,y,z,ha="center",va="center",zorder=20)
    #####
    
    for ii, (x, y, width) in enumerate(zip(xs, ys, widths)):
        segmentx.append(x)
        segmenty.append(y)
        if (width != current_width) or (ii == (len(xs) - 1)):
            
                
            ax.plot(segmentx, segmenty, linewidth=current_width, color=color,
                    **kwargs)
            segmentx, segmenty = [x], [y]
            
            current_width = width
            if width == thick_linewidth:
                color = thick_color
            else:
                color = thin_color
                
    if xlim is None:
        xlim = [min(xs), max(xs)]
    if ylim is None:
        ylim = [min(ys), max(ys)]
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)

    return ax if fig is None else fig

fig,ax = plt.subplots()
plot_widths(df.index, df.y, df.linewidth, df.linecolors, ax=ax, )
ax.axhline(threshold,linestyle="dashed",color="r")
plt.show()

Which yields:

Variable line widths

Note that I added scatter points & text via df['threshold_bool'], in order to highlight how the code decides whether a segment is above or below the threshold, see comments in the code.

matplotlib pandas: Change width between datapoints based on another columns value

Tags:

pandas

dataframe

matplotlib

data-visualization

gies0r

2 Answers

tomjn

Asmus

Recent Activity

Donate For Us

matplotlib pandas: Change width between datapoints based on another columns value

Tags:

pandas

dataframe

matplotlib

data-visualization

gies0r

2 Answers

tomjn

Asmus

Related questions

Recent Activity

Donate For Us