Matplotlib: Difference between revisions

From Torben's Wiki
mNo edit summary
 
mNo edit summary
 
(One intermediate revision by the same user not shown)
Line 1: Line 1:
[[Category:Coding]][[Category:Python]]
[[Category:Coding]][[Category:Python]]
==Matplotlib==
==Matplotlib==
===Simple Pandas DataFrame Plotting===
plt = df["watt"].plot()
fig = plt.get_figure()
fig.tight_layout()
fig.savefig("plot.png")


===Plotting using pandas and matplotlib===
===Plotting using pandas and matplotlib===
Line 26: Line 32:
  # plot data
  # plot data
  df["value_pct"].plot(ax=ax, style=".", legend=True, zorder=2)
  df["value_pct"].plot(ax=ax, style=".", legend=True, zorder=2)
# plot mean value in the same color as the last line
mean_value = df["value_pct"].mean()
last_line_color = ax.get_lines()[-1].get_color()
plt.axhline(mean_value, color=last_line_color, linestyle="--", linewidth=2)
  df["linregress"].plot(ax=ax, style="-", legend=True, zorder=1)
  df["linregress"].plot(ax=ax, style="-", legend=True, zorder=1)
  plt.legend(["Data", "Fit"])
  plt.legend(["Data", "Fit"])

Latest revision as of 22:23, 16 November 2024

Matplotlib

Simple Pandas DataFrame Plotting

plt = df["watt"].plot()
fig = plt.get_figure()
fig.tight_layout()
fig.savefig("plot.png")

Plotting using pandas and matplotlib

plot time series data with fancy formatting using ConciseDateFormatter, german locale, etc.

import locale

import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import pandas as pd

locale.setlocale(locale.LC_ALL, "de_DE.UTF-8")

...
df = pd.DataFrame()

fig, ax = plt.subplots(
    nrows=1,
    ncols=1,
    # figsize=(6, 8),  # default = 6.4,4.8
    dpi=100,
)
# , sharex=True,

# plot data
df["value_pct"].plot(ax=ax, style=".", legend=True, zorder=2)
# plot mean value in the same color as the last line
mean_value = df["value_pct"].mean() 
last_line_color = ax.get_lines()[-1].get_color()
plt.axhline(mean_value, color=last_line_color, linestyle="--", linewidth=2)
df["linregress"].plot(ax=ax, style="-", legend=True, zorder=1)
plt.legend(["Data", "Fit"])

# plot 2 guiding lines
y_last = df["value_pct"].iloc[-1]
# y_min = df["value_pct"].min()
# y_max = df["value_pct"].max()
plt.axhline(y=0.0, color="black", linestyle="-")
plt.axhline(y=y_last, color="blue", linestyle="dotted")

# x tics
# fancy formatting using ConciseDateFormatter
x_tic_locator = mdates.AutoDateLocator(minticks=3, maxticks=7)
x_tic_formatter = mdates.ConciseDateFormatter(
    x_tic_locator,
    show_offset=True,
    offset_formats=["", "%Y", "%b %Y", "%Y-%b-%d", "%Y-%b-%d", "%Y-%b-%d %H:%M"],
)
# ax.xaxis.set_major_locator(x_tic_locator)
ax.xaxis.set_major_formatter(x_tic_formatter)

# y tics
ax.yaxis.set_major_locator(mticker.MaxNLocator(integer=True))
ax.yaxis.set_major_formatter(mticker.PercentFormatter(decimals=0))

# title
plt.suptitle("my Title")
# ax.set_title("my Subtitle") # "" to remove
plt.xlabel("xLabel")  # "" to remove
plt.ylabel("yLabel")  # "" to remove

# layout
plt.grid(axis="both")
plt.tight_layout()

# save and close
plt.savefig(fname="plot.png", format="png")
# plt.clf()
plt.close()  # prevents figure.max_open_warning


horizontal bar chart of rows "plant" and "cnt"

myPlot = df.plot.barh(legend=False, x='plant', y='cnt', linewidth=2.0, zorder=1, figsize=(12, 12))
plt.gca().invert_yaxis()
plt.gca().xaxis.set_major_formatter(mticker.PercentFormatter()) #
myPlot.set_ylim(0, 100)
plt.title('My Title')
plt.xlabel("")
# x y grid
plt.gca().set_axisbelow(True)  # for grid below the lines
plt.grid(axis='both')
# x grid for bar chart
plt.grid(axis='x')
plt.tight_layout()
plt.savefig(fname='out.png', format='png')

2 Subplots sharing xaxis

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import pandas as pd

df = pd.DataFrame()
# initialize plot
fig, ax = plt.subplots(
    nrows=2, ncols=1, sharex=True, figsize=(6, 8), dpi=100  # default = 6.4,4.8
)
fig.suptitle(f"COVID-19 in {long_name}")  # super title
ax[0].set_title("Inzidenzwert und -anstieg", fontsize=10)
ax[1].set_title("Tote und Intensivstationsbelegung", fontsize=10)

# define colors for data
colors = (("blue", "red"), ("purple", "green"))

# plot the data of a pandas dataframe
df["Inzidenz"].plot(
    ax=ax[0],
    color=colors[0][0],
    legend=False,
    secondary_y=False,
    zorder=2,
    linewidth=2.0,
)

# important: set grid after plot of primary y data
plt.grid(axis="both")

df["Inzidenzanstieg"].plot.area(
    ax=ax[0], color=colors[0][1], legend=False, secondary_y=True, zorder=1
)
df["Tote"].plot(
    ax=ax[1],
    color=colors[1][0],
    legend=False,
    secondary_y=False,
    zorder=2,
    linewidth=2.0,
)
df["Intensivstationsbelegung"].plot.area(
    ax=ax[1],
    color=colors[1][1],
    legend=False,
    secondary_y=True,
    zorder=1,
    linewidth=2.0,
)

# Axis layout, text and range
# remove label as date is obvious
ax[1].set_xlabel("")

# top plot
ax[0].set_title("Inzidenzwert und -anstieg", fontsize=10)
ax[1].set_title("Tote und Intensivstationsbelegung", fontsize=10)
# axis label
ax[0].set_ylabel("Inzidenz (7 Tage)")
ax[0].right_ax.set_ylabel("Inzidenzanstieg (7 Tage)")
ax[1].set_ylabel("Tote (7 Tage pro Millionen)")
ax[1].right_ax.set_ylabel("Intensivstationen Anteil COVID-Patienten")
# axis range
ax[0].set_ylim(
    0,
)  # 0,550
ax[0].right_ax.set_ylim(0, 150)
ax[1].set_ylim(
    0,
)  # 0,250
ax[1].right_ax.set_ylim(0, 40)
# tick freq
# all are set to make charts better compareable
ax[0].yaxis.set_major_locator(ticker.MultipleLocator(50))
ax[0].right_ax.yaxis.set_major_locator(ticker.MultipleLocator(25))
ax[1].yaxis.set_major_locator(ticker.MultipleLocator(25))
ax[1].right_ax.yaxis.set_major_locator(ticker.MultipleLocator(10))
# tick format
ax[0].yaxis.set_major_formatter(ticker.FormatStrFormatter("%d"))
ax[0].right_ax.yaxis.set_major_formatter(ticker.PercentFormatter(decimals=0))
ax[1].yaxis.set_major_formatter(ticker.FormatStrFormatter("%d"))
ax[1].right_ax.yaxis.set_major_formatter(ticker.PercentFormatter(decimals=0))
# color of label and ticks
ax[0].yaxis.label.set_color(colors[0][0])
ax[0].tick_params(axis="y", colors=colors[0][0])
ax[0].right_ax.yaxis.label.set_color(colors[0][1])
ax[0].right_ax.tick_params(axis="y", colors=colors[0][1])
ax[1].yaxis.label.set_color(colors[1][0])
ax[1].tick_params(axis="y", colors=colors[1][0])
ax[1].right_ax.yaxis.label.set_color(colors[1][1])
ax[1].right_ax.tick_params(axis="y", colors=colors[1][1])

Area and Line

df = df.rename(columns={"cnt": "count", "rolling": "7-day average"})
df["count"].plot(kind="area", ax=ax, legend=True, linewidth=0.0, zorder=1)
df["7-day average"].plot(ax=ax, legend=True, linewidth=2.0, zorder=2)

Stacked area

df.plot(kind="area", ax=ax, stacked=True, linewidth=0.0)

Heatmap / Colormap

import pandas as pd
# import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# plot heatmap
# V1
# ax = sns.heatmap(df.T)
# plt.savefig("data.png", dpi=100)

# V2
dates = df.index.to_pydatetime()
dnum = mdates.date2num(dates)
start = dnum[0] - (dnum[1] - dnum[0]) / 2.0
stop = dnum[-1] + (dnum[1] - dnum[0]) / 2.0
extent = [start, stop, -0.5, len(df.columns) - 0.5]

fig, ax = plt.subplots(figsize=(9, 16), zorder=1)
# im = ax.imshow(df.T.values, extent=extent, aspect="auto")
im = ax.imshow(
    df.T.values * (-1),
    cmap="jet",
    interpolation="none",
    origin="lower",
    aspect="auto",
    extent=extent,
)


ax.set_yticks(range(0, len(list(df.columns))))
ax.set_yticklabels(df.columns)

ax.xaxis.set_major_locator(mdates.DayLocator())
# # ax.xaxis.set_minor_locator(mdates.DayLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter("%d.%m."))

# fig.colorbar(im)

# plt.gca().set_axisbelow(True)  # for grid below the lines
# plt.grid(axis="both", zorder=-1)
plt.tight_layout()
ax.margins(y=0.9, x=0.9)

plt.savefig("data-heatmap.png", dpi=100)

Defaults

Default Colors

import matplotlib.colors as mcolors

print("mcolors.TABLEAU_COLORS")
for name, value in mcolors.TABLEAU_COLORS.items():
    print(name, "\t", value)

colors = mcolors.TABLEAU_COLORS.keys()
print(colors)

tab:blue         #1f77b4
tab:orange       #ff7f0e
tab:green        #2ca02c
tab:red          #d62728
tab:purple       #9467bd
tab:brown        #8c564b
tab:pink         #e377c2
tab:gray         #7f7f7f
tab:olive        #bcbd22
tab:cyan         #17becf

more stuff

# using 2nd axis and filled area
ax1 = df.Cases_Last_Week_Per_100000.plot(
      color="blue", legend=False, secondary_y=False, zorder=2)
ax1.set_zorder(2)
# important: transparent background for line plot
ax1.set_facecolor('none')
ax2 = df.Cases_Last_Week_7Day_Percent.plot.area(color="red",
                                                   legend=False, secondary_y=True, zorder=1)
ax2.set_zorder(1)

# set axis range
ax1.set_ylim(0, )
ax2.set_ylim(0, 200)

# set axis label
ax1.set_ylabel('Inzidenz (7 Tage)')
ax2.set_ylabel('Anstieg (7 Tage)')
# no label for x axis
# plt.xlabel("")
ax1.set_xlabel("")
ax2.set_xlabel("")

# tics as percentage
import matplotlib.ticker as mtick
ax2.yaxis.set_major_formatter(mtick.PercentFormatter())

# axis numbers and label colors 
ax1.yaxis.label.set_color('blue')
ax1.tick_params(axis='y', colors='blue')
ax2.yaxis.label.set_color('red')
ax2.tick_params(axis='y', colors='red')

# set locale language setting for date axis etc.
import locale
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')

# add text to bottom right
plt.gcf().text(1.0, 0.0, s="by Torben https://entorb.net , based on RKI and DIVI data", fontsize=8,
               horizontalalignment='right', verticalalignment='bottom', rotation='vertical')

Histogram

import matplotlib.pyplot as plt
fig, ax = plt.subplots()  # type: ignore
bins = [0, 1, 2, 4, 8, 15, 30]
# bins = range(0, 30, 1)
df.hist(column="RespTime", ax=ax, bins=bins, density=True)
fig.savefig("plot-times-hist.png")

Time of Day as x axis

ax.set_xticks([dt.time(i, 0) for i in range(0, 24, 4)])
ax.set_xlim(dt.time(0), dt.time(23, 59, 59))

Colormaps: lines in grey->black or cold->hot

# grey to black
import numpy as np
colors = [str(x) for x in np.linspace(0.9, 0.1, num_lines)]

# blue to red / cool to warm
from matplotlib import colormaps
colormap = colormaps["coolwarm"]
colors = [colormap(i / num_lines) for i in range(num_lines)]