import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%config InlineBackend.figure_format ='retina'
%load_ext nb_black
Matplotlib has two interfaces: the object-oriented interface renders Axes instances on Figure instances, while the less flexible state-based MATLAB style-interface keeps track of the current figure and axes and other objects and directs plotting functions accordingly (more here).
Object-oriented plot
df = sns.load_dataset("diamonds")
fig, ax = plt.subplots()
ax.scatter(x="carat", y="price", data=df)
ax.set(xlabel="Carat", ylabel="Price")
[Text(0.5, 0, 'Carat'), Text(0, 0.5, 'Price')]
pyplot version
plt.scatter(x="carat", y="price", data=df)
plt.xlabel("Carat")
plt.ylabel("Price")
Text(0, 0.5, 'Price')
Reading in raw data of customer sales transactions and keeping sales volume and number of purchases for top 10 customers by sales.
fp = (
"https://github.com/chris1610/pbpython/blob/master/data/"
"sample-salesv3.xlsx?raw=true"
)
df_raw = pd.read_excel(fp)
print(df_raw.shape)
df_raw.head(2)
(1500, 7)
account number | name | sku | quantity | unit price | ext price | date | |
---|---|---|---|---|---|---|---|
0 | 740150 | Barton LLC | B1-20000 | 39 | 86.69 | 3380.91 | 2014-01-01 07:21:51 |
1 | 714466 | Trantow-Barrows | S2-77896 | -1 | 63.16 | -63.16 | 2014-01-01 10:00:47 |
df = (
df_raw.groupby("name")
.agg(sales=("ext price", "sum"), purchases=("quantity", "count"))
.sort_values("sales")[-10:]
.reset_index()
)
df
name | sales | purchases | |
---|---|---|---|
0 | Keeling LLC | 100934.30 | 74 |
1 | Frami, Hills and Schmidt | 103569.59 | 72 |
2 | Koepp Ltd | 103660.54 | 82 |
3 | Will LLC | 104437.60 | 74 |
4 | Barton LLC | 109438.50 | 82 |
5 | Fritsch, Russel and Anderson | 112214.71 | 81 |
6 | Jerde-Hilpert | 112591.43 | 89 |
7 | Trantow-Barrows | 123381.38 | 94 |
8 | White-Trantow | 135841.99 | 86 |
9 | Kulas Inc | 137351.96 | 94 |
Choosing a style
plt.style.available
['Solarize_Light2', '_classic_test_patch', '_mpl-gallery', '_mpl-gallery-nogrid', 'bmh', 'classic', 'dark_background', 'fast', 'fivethirtyeight', 'ggplot', 'grayscale', 'seaborn', 'seaborn-bright', 'seaborn-colorblind', 'seaborn-dark', 'seaborn-dark-palette', 'seaborn-darkgrid', 'seaborn-deep', 'seaborn-muted', 'seaborn-notebook', 'seaborn-paper', 'seaborn-pastel', 'seaborn-poster', 'seaborn-talk', 'seaborn-ticks', 'seaborn-white', 'seaborn-whitegrid', 'tableau-colorblind10']
plt.style.use("seaborn-whitegrid")
Prototyping plot with Pandas
df.plot(kind="barh", x="name", y="sales", legend=None);
Customising plot combining fast Pandas plotting with Matplotlib object-oriented API
def xlim(x):
"""Set xlim with custom padding."""
return x.max() * np.array([-0.05, 1.3])
fig, ax = plt.subplots()
df.plot(kind="barh", x="name", y="sales", legend=None, ax=ax)
ax.set(
xlim=xlim(df.sales),
xlabel="Sales",
ylabel="Customer",
title="Top customers 2014",
);
Formatting currency values using custom formatter
def currency(x, pos):
"""Reformat currency amount at position x."""
return f"{x * 1e-3:1.1f}K"
ax.xaxis.set_major_formatter(currency)
fig
Adding a line for average sales
sales_mean = df.sales.mean()
ax.axvline(sales_mean, linestyle=":", color="green")
lab = f"Mean: {currency(sales_mean, 0)}"
ax.text(
x=1.05 * sales_mean,
y=0,
s=lab,
color="green",
)
fig
Identify new customers
for customer in [2, 4, 5]:
ax.text(x=1.05 * sales_mean, y=customer, s="New customer")
fig
Show sales and number of purchases, xkcd-themed (just because...)
with plt.xkcd():
fig, (ax0, ax1) = plt.subplots(1, 2, figsize=(8, 4), sharey=True)
df.plot(kind="barh", x="name", y="sales", legend=None, ax=ax0)
sales_mean = df.sales.mean()
ax0.axvline(sales_mean, color="green", linestyle=":")
lab = f"Mean: {currency(sales_mean, 0)}"
ax0.text(1.05 * sales_mean, 0, lab, color="green")
for customer in [2, 4, 5]:
ax0.text(sales_mean, customer, "New customer")
ax0.xaxis.set_major_formatter(currency)
ax0.set(xlim=xlim(df.sales), ylabel="Customer",title="Sales")
df.plot(kind="barh", x="name", y="purchases", legend=None, ax=ax1)
purch_mean = df.purchases.mean()
ax1.axvline(purch_mean, color="green", linestyle=":")
ax1.text(purch_mean, 0, f"Mean: {purch_mean}", color="green")
ax1.set(title="Purchases", xlim=xlim(df.purchases))
fig.suptitle(
"Sales and purchases for top 10 customers in 2022",
fontsize=18,
fontweight="bold",
y=1.05,
)