Boxplots

Date: February 21st 2016
Last updated: February 21st 2016

Import modules

import matplotlib.pyplot as plt
import numpy as np

Create data

d1 = np.random.rand(25) * 35
d2 = np.random.rand(30) * 25
d3 = np.random.rand(15) * 10
data = [d1, d2, d3]

Example 1: Basic boxplot: default

plt.figure()
plt.boxplot(data) 
plt.show()

Example 1: Default output
Example 1 basic boxplot

Example 2: Add more detail to boxplot

# note the change in calling plt to plt.subplots()
fig, ax = plt.subplots()
bp = ax.boxplot(data, sym='k+')

# axis labels
ax.set_xlabel('treatment')
ax.set_ylabel('response')

# change line style of whiskers and terminal cross hairs
plt.setp(bp['whiskers'], color='k', linestyle='-')
plt.setp(bp['fliers'], markersize=3.0)

# add tick labels
plt.xticks([1, 2, 3], ['data1', 'data2', 'data3'])
plt.show()

Example 2: output
Example 2 boxplot

Example 3: boxplot of 2 factors for 3 treatments

# This example is adapted from a response given here: http://stackoverflow.com/questions/16592222/matplotlib-group-boxplots

# function to create data
def make_data(n_samples, max_multiplier, n_treatments):
    data_list = []
    for i in range(n_treatments):
        multiplier = np.random.rand(1) * max_multiplier
        a_list = np.random.rand(n_samples) * multiplier  
        data_list.append(a_list)
    return data_list

# get the max value for setting y axis
# loop over arguments given to function
# then, loop over the lists in an array
# then, loop through numbers in the list
def get_max_value(*args):
    value = 0
    for arg in args:
        for list in arg:
            for number in list:
                if number >= value:
                    value = number 
    value *= 1.1
    return value

# function to set color of each box
def set_box_color(bp, color):
    plt.setp(bp['boxes'], color=color)
    plt.setp(bp['whiskers'], color=color)
    plt.setp(bp['caps'], color=color)
    plt.setp(bp['medians'], color=color)

# create figure
plt.figure()

# create data    
data_a = make_data(50, 100, 3)
data_b = make_data(50, 80, 3)

# get y max (+10%)
y_max = get_max_value(data_a, data_b)

# create boxes
bpl = plt.boxplot(data_a, positions=np.array(range(len(data_a)))*2.0-0.4, sym='', widths=0.6)
bpr = plt.boxplot(data_b, positions=np.array(range(len(data_b)))*2.0+0.4, sym='', widths=0.6)
set_box_color(bpl, '#D7191C') #colors are from http://colorbrewer2.org/
set_box_color(bpr, '#2C7BB6')

# create a legend
plt.plot([], c='#D7191C', label='Group1')
plt.plot([], c='#2C7BB6', label='Group2')
plt.legend()

# tick labels
ticks = ['data1', 'data2', 'data3']
plt.xticks(range(0, len(ticks) * 2, 2), ticks)
plt.xlim(-2, len(ticks)*2)
plt.ylim(0, y_max)

#plt.tight_layout()
plt.show()

Example 3: group boxplot output
Example 3 boxplot

Example 4: complete figure with labels

# create figure
plt.figure()
# create data    
...
# get y max (+10%)
...
# tick labels
...
# Create boxes
...
# set box color
...
# create a legend
...
# set ticks and axis limits
...

# main title
plt.suptitle('My example boxplot', fontsize=24)

# x and y labels
plt.xlabel('Treatment', fontsize=20)
plt.ylabel('Response', fontsize=20)

#plt.tight_layout()
plt.subplots_adjust(left=3, bottom=3, right=1, top=1, wspace=0, hspace=0)

plt.show()

Example 4: complete boxplot output
Example 4 boxplot

Useful resources

results matching ""

    No results matching ""