Matplotlib
Matplotlib
Plotting using pandas and matplotlib
plot time series data with fancy formatting using ConciseDateFormatter, german locale, etc.
import locale import matplotlib.dates as mdates import matplotlib.pyplot as plt import matplotlib.ticker as mticker import pandas as pd locale.setlocale(locale.LC_ALL, "de_DE.UTF-8") ... df = pd.DataFrame() fig, ax = plt.subplots( nrows=1, ncols=1, # figsize=(6, 8), # default = 6.4,4.8 dpi=100, ) # , sharex=True, # plot data df["value_pct"].plot(ax=ax, style=".", legend=True, zorder=2) df["linregress"].plot(ax=ax, style="-", legend=True, zorder=1) plt.legend(["Data", "Fit"]) # plot 2 guiding lines y_last = df["value_pct"].iloc[-1] # y_min = df["value_pct"].min() # y_max = df["value_pct"].max() plt.axhline(y=0.0, color="black", linestyle="-") plt.axhline(y=y_last, color="blue", linestyle="dotted") # x tics # fancy formatting using ConciseDateFormatter x_tic_locator = mdates.AutoDateLocator(minticks=3, maxticks=7) x_tic_formatter = mdates.ConciseDateFormatter( x_tic_locator, show_offset=True, offset_formats=["", "%Y", "%b %Y", "%Y-%b-%d", "%Y-%b-%d", "%Y-%b-%d %H:%M"], ) # ax.xaxis.set_major_locator(x_tic_locator) ax.xaxis.set_major_formatter(x_tic_formatter) # y tics ax.yaxis.set_major_locator(mticker.MaxNLocator(integer=True)) ax.yaxis.set_major_formatter(mticker.PercentFormatter(decimals=0)) # title plt.suptitle("my Title") # ax.set_title("my Subtitle") # "" to remove plt.xlabel("xLabel") # "" to remove plt.ylabel("yLabel") # "" to remove # layout plt.grid(axis="both") plt.tight_layout() # save and close plt.savefig(fname="plot.png", format="png") # plt.clf() plt.close() # prevents figure.max_open_warning
horizontal bar chart of rows "plant" and "cnt"
myPlot = df.plot.barh(legend=False, x='plant', y='cnt', linewidth=2.0, zorder=1, figsize=(12, 12)) plt.gca().invert_yaxis() plt.gca().xaxis.set_major_formatter(mticker.PercentFormatter()) # myPlot.set_ylim(0, 100) plt.title('My Title') plt.xlabel("") # x y grid plt.gca().set_axisbelow(True) # for grid below the lines plt.grid(axis='both') # x grid for bar chart plt.grid(axis='x') plt.tight_layout() plt.savefig(fname='out.png', format='png')
2 Subplots sharing xaxis
import matplotlib.pyplot as plt import matplotlib.ticker as ticker import pandas as pd df = pd.DataFrame() # initialize plot fig, ax = plt.subplots( nrows=2, ncols=1, sharex=True, figsize=(6, 8), dpi=100 # default = 6.4,4.8 ) fig.suptitle(f"COVID-19 in {long_name}") # super title ax[0].set_title("Inzidenzwert und -anstieg", fontsize=10) ax[1].set_title("Tote und Intensivstationsbelegung", fontsize=10) # define colors for data colors = (("blue", "red"), ("purple", "green")) # plot the data of a pandas dataframe df["Inzidenz"].plot( ax=ax[0], color=colors[0][0], legend=False, secondary_y=False, zorder=2, linewidth=2.0, ) # important: set grid after plot of primary y data plt.grid(axis="both") df["Inzidenzanstieg"].plot.area( ax=ax[0], color=colors[0][1], legend=False, secondary_y=True, zorder=1 ) df["Tote"].plot( ax=ax[1], color=colors[1][0], legend=False, secondary_y=False, zorder=2, linewidth=2.0, ) df["Intensivstationsbelegung"].plot.area( ax=ax[1], color=colors[1][1], legend=False, secondary_y=True, zorder=1, linewidth=2.0, ) # Axis layout, text and range # remove label as date is obvious ax[1].set_xlabel("") # top plot ax[0].set_title("Inzidenzwert und -anstieg", fontsize=10) ax[1].set_title("Tote und Intensivstationsbelegung", fontsize=10) # axis label ax[0].set_ylabel("Inzidenz (7 Tage)") ax[0].right_ax.set_ylabel("Inzidenzanstieg (7 Tage)") ax[1].set_ylabel("Tote (7 Tage pro Millionen)") ax[1].right_ax.set_ylabel("Intensivstationen Anteil COVID-Patienten") # axis range ax[0].set_ylim( 0, ) # 0,550 ax[0].right_ax.set_ylim(0, 150) ax[1].set_ylim( 0, ) # 0,250 ax[1].right_ax.set_ylim(0, 40) # tick freq # all are set to make charts better compareable ax[0].yaxis.set_major_locator(ticker.MultipleLocator(50)) ax[0].right_ax.yaxis.set_major_locator(ticker.MultipleLocator(25)) ax[1].yaxis.set_major_locator(ticker.MultipleLocator(25)) ax[1].right_ax.yaxis.set_major_locator(ticker.MultipleLocator(10)) # tick format ax[0].yaxis.set_major_formatter(ticker.FormatStrFormatter("%d")) ax[0].right_ax.yaxis.set_major_formatter(ticker.PercentFormatter(decimals=0)) ax[1].yaxis.set_major_formatter(ticker.FormatStrFormatter("%d")) ax[1].right_ax.yaxis.set_major_formatter(ticker.PercentFormatter(decimals=0)) # color of label and ticks ax[0].yaxis.label.set_color(colors[0][0]) ax[0].tick_params(axis="y", colors=colors[0][0]) ax[0].right_ax.yaxis.label.set_color(colors[0][1]) ax[0].right_ax.tick_params(axis="y", colors=colors[0][1]) ax[1].yaxis.label.set_color(colors[1][0]) ax[1].tick_params(axis="y", colors=colors[1][0]) ax[1].right_ax.yaxis.label.set_color(colors[1][1]) ax[1].right_ax.tick_params(axis="y", colors=colors[1][1])
Area and Line
df = df.rename(columns={"cnt": "count", "rolling": "7-day average"}) df["count"].plot(kind="area", ax=ax, legend=True, linewidth=0.0, zorder=1) df["7-day average"].plot(ax=ax, legend=True, linewidth=2.0, zorder=2)
Stacked area
df.plot(kind="area", ax=ax, stacked=True, linewidth=0.0)
Heatmap / Colormap
import pandas as pd # import seaborn as sns import matplotlib.pyplot as plt import matplotlib.dates as mdates # plot heatmap # V1 # ax = sns.heatmap(df.T) # plt.savefig("data.png", dpi=100) # V2 dates = df.index.to_pydatetime() dnum = mdates.date2num(dates) start = dnum[0] - (dnum[1] - dnum[0]) / 2.0 stop = dnum[-1] + (dnum[1] - dnum[0]) / 2.0 extent = [start, stop, -0.5, len(df.columns) - 0.5] fig, ax = plt.subplots(figsize=(9, 16), zorder=1) # im = ax.imshow(df.T.values, extent=extent, aspect="auto") im = ax.imshow( df.T.values * (-1), cmap="jet", interpolation="none", origin="lower", aspect="auto", extent=extent, ) ax.set_yticks(range(0, len(list(df.columns)))) ax.set_yticklabels(df.columns) ax.xaxis.set_major_locator(mdates.DayLocator()) # # ax.xaxis.set_minor_locator(mdates.DayLocator()) ax.xaxis.set_major_formatter(mdates.DateFormatter("%d.%m.")) # fig.colorbar(im) # plt.gca().set_axisbelow(True) # for grid below the lines # plt.grid(axis="both", zorder=-1) plt.tight_layout() ax.margins(y=0.9, x=0.9) plt.savefig("data-heatmap.png", dpi=100)
Defaults
Default Colors
import matplotlib.colors as mcolors print("mcolors.TABLEAU_COLORS") for name, value in mcolors.TABLEAU_COLORS.items(): print(name, "\t", value) colors = mcolors.TABLEAU_COLORS.keys() print(colors) tab:blue #1f77b4 tab:orange #ff7f0e tab:green #2ca02c tab:red #d62728 tab:purple #9467bd tab:brown #8c564b tab:pink #e377c2 tab:gray #7f7f7f tab:olive #bcbd22 tab:cyan #17becf
more stuff
# using 2nd axis and filled area ax1 = df.Cases_Last_Week_Per_100000.plot( color="blue", legend=False, secondary_y=False, zorder=2) ax1.set_zorder(2) # important: transparent background for line plot ax1.set_facecolor('none') ax2 = df.Cases_Last_Week_7Day_Percent.plot.area(color="red", legend=False, secondary_y=True, zorder=1) ax2.set_zorder(1) # set axis range ax1.set_ylim(0, ) ax2.set_ylim(0, 200) # set axis label ax1.set_ylabel('Inzidenz (7 Tage)') ax2.set_ylabel('Anstieg (7 Tage)') # no label for x axis # plt.xlabel("") ax1.set_xlabel("") ax2.set_xlabel("") # tics as percentage import matplotlib.ticker as mtick ax2.yaxis.set_major_formatter(mtick.PercentFormatter()) # axis numbers and label colors ax1.yaxis.label.set_color('blue') ax1.tick_params(axis='y', colors='blue') ax2.yaxis.label.set_color('red') ax2.tick_params(axis='y', colors='red') # set locale language setting for date axis etc. import locale locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') # add text to bottom right plt.gcf().text(1.0, 0.0, s="by Torben https://entorb.net , based on RKI and DIVI data", fontsize=8, horizontalalignment='right', verticalalignment='bottom', rotation='vertical')
Histogram
import matplotlib.pyplot as plt fig, ax = plt.subplots() # type: ignore bins = [0, 1, 2, 4, 8, 15, 30] # bins = range(0, 30, 1) df.hist(column="RespTime", ax=ax, bins=bins, density=True) fig.savefig("plot-times-hist.png")
Time of Day as x axis
ax.set_xticks([dt.time(i, 0) for i in range(0, 24, 4)]) ax.set_xlim(dt.time(0), dt.time(23, 59, 59))
Colormaps: lines in grey->black or cold->hot
# grey to black import numpy as np colors = [str(x) for x in np.linspace(0.9, 0.1, num_lines)] # blue to red / cool to warm from matplotlib import colormaps colormap = colormaps["coolwarm"] colors = [colormap(i / num_lines) for i in range(num_lines)]