I have a pandas dataframe containing data on Facebook Posts broken down by "type of post." The dataframe is called "Posts_by_type" It contains the # of likes, # of shares, and the type of post. There are 3 types of post: Racing, Entertainment, and Promo.
I want to create a boxplot in matplotlib showing the # of Likes for each type of post.
My code works:
Posts_by_type.boxplot(column='Likes', by='Type', grid=True)
This produces the following boxplot:
HOWEVER, I also want to label the median and the whiskers on the boxplot with the corresponding numeric values.
Is this possible in matplotlib? If so, can anyone give me some pointers on how to do it?
A solution that also adds the values for the boxes.
import random
import string
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
def get_x_tick_labels(df, grouped_by):
tmp = df.groupby([grouped_by]).size()
return ["{0}: {1}".format(k,v) for k, v in tmp.to_dict().items()]
def series_values_as_dict(series_object):
tmp = series_object.to_dict().values()
return [y for y in tmp][0]
def generate_dataframe():
# Create a pandas dataframe...
_likes = [random.randint(0,300) for _ in range(100)]
_type = [random.choice(string.ascii_uppercase[:5]) for _ in range(100)]
_shares = [random.randint(0,100) for _ in range(100)]
return pd.DataFrame(
{'Likes': _likes,
'Type': _type,
'shares': _shares
})
def add_values(bp, ax):
""" This actually adds the numbers to the various points of the boxplots"""
for element in ['whiskers', 'medians', 'caps']:
for line in bp[element]:
# Get the position of the element. y is the label you want
(x_l, y),(x_r, _) = line.get_xydata()
# Make sure datapoints exist
# (I've been working with intervals, should not be problem for this case)
if not np.isnan(y):
x_line_center = x_l + (x_r - x_l)/2
y_line_center = y # Since it's a line and it's horisontal
# overlay the value: on the line, from center to right
ax.text(x_line_center, y_line_center, # Position
'%.3f' % y, # Value (3f = 3 decimal float)
verticalalignment='center', # Centered vertically with line
fontsize=16, backgroundcolor="white")
posts_by_type = generate_dataframe()
fig, axes = plt.subplots(1, figsize=(20, 10))
bp_series = posts_by_type.boxplot(column='Likes', by='Type',
grid=True, figsize=(25, 10),
ax=axes, return_type='dict', labels=labels)
# This should return a dict, but gives me a Series object, soo...
bp_dict = series_values_as_dict(bp_series)
#Now add the values
add_values(bp_dict, axes)
# Set a label on X-axis for each boxplot
labels = get_x_tick_labels(posts_by_type, 'Type')
plt.xticks(range(1, len(labels) + 1), labels)
# Change some other texts on the graphs?
plt.title('Likes per type of post', fontsize=22)
plt.xlabel('Type', fontsize=18)
plt.ylabel('Likes', fontsize=18)
plt.suptitle('This is a pretty graph')
plt.show()
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With