"""
MCMC plotting methods.
"""
__all__ = ["plot_all", "plot_corr", "plot_corrmatrix", "plot_trace", "plot_logp", "format_vars"]
import math
from typing import Optional, List
import numpy as np
from scipy.stats import gaussian_kde
from . import corrplot, varplot
from .stats import format_vars, format_uncertainty, save_vars, var_stats
from .state import MCMCDraw, Draw
CORRPLOT_MAXVAR = 25 # maximum number of variables to plot in correlation matrix
TRACE_WSPACE = 0.15
TRACE_HSPACE = 0.1
# TODO: plot_all does not allow us to specify variables or their ranges
# TODO: plot_all not tested with revised handling of derived parameters
# TODO: remove hack allowing plot_all to accept a state.draw as well as a state object
[docs]
def plot_all(state: MCMCDraw | Draw, portion: Optional[float] = None, figfile=None):
# Print/save uncertainty report before loading matplotlib or creating plots
if isinstance(state, MCMCDraw):
draw = state.draw(portion=portion)
else:
draw = state
state = draw.state
all_vstats = var_stats(draw)
print(format_vars(all_vstats))
print(
f"\nStatistics and plots based on {len(draw.points)} samples ({int(100 * draw.portion)}% of total samples drawn)"
)
if figfile is not None:
save_vars(all_vstats, figfile + "-err.json")
import matplotlib.pyplot as plt
figext = "." + plt.rcParams.get("savefig.format", "png")
# Use finer binning with more samples. For 1% bin variation p,
# points per bin k = (100/p)**2 = 10000, and nbins = N // k.
nbins = max(min(draw.points.shape[0] // 10000, 400), 30)
# histograms
plt.figure(figsize=varplot.var_plot_size(len(all_vstats)))
varplot.plot_vars(draw, all_vstats, nbins=nbins)
if draw.title:
plt.suptitle(draw.title, x=0, y=1, va="top", ha="left")
if figfile is not None:
plt.savefig(figfile + "-vars" + figext)
# parameter traces
plt.figure()
plot_traces(draw.state, portion=draw.portion, vars=draw.vars)
plt.suptitle("Parameter history" + (" for " + draw.title if draw.title else ""))
if figfile is not None:
plt.savefig(figfile + "-trace" + figext)
# Acceptance rate
if False:
plt.figure()
plot_acceptance_rate(draw.state, portion=draw.portion)
if figfile is not None:
plt.savefig(figfile + "-acceptance" + figext)
# convergence plot
plt.figure()
plot_logp(draw.state, portion=draw.portion)
if draw.title:
plt.suptitle(draw.title)
if figfile is not None:
plt.savefig(figfile + "-logp" + figext)
# correlation plot
if draw.Nvar <= CORRPLOT_MAXVAR:
plt.figure()
plot_corrmatrix(draw, nbins=nbins)
if draw.title:
plt.suptitle(draw.title)
if figfile is not None:
plt.savefig(figfile + "-corr" + figext)
# parallel coordinates plot
if draw.Nvar > 1:
from . import parcoord
plt.figure()
parcoord.plot(draw, control_var=0)
if draw.title:
plt.suptitle(draw.title)
if figfile is not None:
plt.savefig(figfile + "-parcor" + figext)
[docs]
def plot_corrmatrix(draw, nbins=50, vstats=None, full=False, fig=None):
ranges = None if vstats is None or full else [v.p95_range for v in vstats]
c = corrplot.Corr2d(draw.points.T, bins=nbins, ranges=ranges, labels=draw.labels)
c.plot(fig=fig)
# print "Correlation matrix\n",c.R()
class KDE1D(gaussian_kde):
covariance_factor = lambda self: 2 * self.silverman_factor()
class KDE2D(gaussian_kde):
covariance_factor = gaussian_kde.silverman_factor
def __init__(self, dataset):
gaussian_kde.__init__(self, dataset.T)
def evalxy(self, x, y):
grid_x, grid_y = np.meshgrid(x, y)
dxy = self.evaluate(np.vstack([grid_x.flatten(), grid_y.flatten()]))
return dxy.reshape(grid_x.shape)
__call__ = evalxy
[docs]
def plot_corr(draw, vars=(0, 1)):
"""
Plot kernel density estimate of the parameter correlation.
*vars* is the pair of parameters to plot (from *draw.labels*, 0-origin).
"""
import matplotlib.pyplot as plt
_, _ = vars # Make sure vars is length 2
labels = [draw.labels[v] for v in vars]
values = [draw.points[:, v] for v in vars]
# Form kernel density estimates of the parameters
xmin, xmax = min(values[0]), max(values[0])
density_x = KDE1D(values[0])
x = np.linspace(xmin, xmax, 100)
px = density_x(x)
density_y = KDE1D(values[1])
ymin, ymax = min(values[1]), max(values[1])
y = np.linspace(ymin, ymax, 100)
py = density_y(y)
nbins = 50
ax_data = plt.axes([0.1, 0.1, 0.63, 0.63]) # x,y,w,h
density_xy = KDE2D(values[vars])
dxy = density_xy(x, y) * draw.points.shape[0]
ax_data.pcolorfast(x, y, dxy, cmap=plt.cm.gist_earth_r) # @UndefinedVariable
ax_data.plot(values[0], values[1], "k.", markersize=1)
ax_data.set_xlabel(labels[0])
ax_data.set_ylabel(labels[1])
ax_hist_x = plt.axes([0.1, 0.75, 0.63, 0.2], sharex=ax_data)
ax_hist_x.hist(values[0], nbins, orientation="vertical", density=1)
ax_hist_x.plot(x, px, "k-")
ax_hist_x.yaxis.set_major_locator(plt.MaxNLocator(4, prune="both"))
plt.setp(
ax_hist_x.get_xticklabels(),
visible=False,
)
ax_hist_y = plt.axes([0.75, 0.1, 0.2, 0.63], sharey=ax_data)
ax_hist_y.hist(values[1], nbins, orientation="horizontal", density=1)
ax_hist_y.plot(py, y, "k-")
ax_hist_y.xaxis.set_major_locator(plt.MaxNLocator(4, prune="both"))
plt.setp(ax_hist_y.get_yticklabels(), visible=False)
def plot_traces(
state: MCMCDraw, vars: Optional[List[int]] = None, portion: Optional[float] = None, fig=None, max_traces=6
):
import matplotlib.pyplot as plt
# TODO: Consider using masked chains in draw for traces rather than original
# Can only plot traces for on the original state. Ignore any variables that are out of range.
if vars:
vars = [v for v in vars if v < state.Nvar]
if fig is None:
fig = plt.gcf()
if vars is None:
vars = list(range(min(state.Nvar, max_traces)))
plt.clf()
nw, nh = tile_axes(len(vars), fig=fig)
fig.subplots_adjust(hspace=TRACE_HSPACE, wspace=TRACE_WSPACE)
for k, var in enumerate(vars):
axes = fig.add_subplot(nw, nh, k + 1)
plot_trace(state, var=var, portion=portion, axes=axes)
axes.set_yticklabels([])
# TODO: label traces on the plots to save space between plots
# label = state.labels[var]
# axes.text(0.93, 0.93, label, ha="right", va="top", transform=axes.transAxes, backgroundcolor=(1.0, 1.0, 1.0, 0.75))
# axes.set_ylabel("")
row = k // nh
if row != nw - 1:
# print(f"row {row} of {nw} x {nh}")
axes.set_xticklabels([])
axes.set_xlabel("")
[docs]
def plot_trace(state: MCMCDraw, var: int = 0, portion: Optional[float] = None, axes=None, fig=None):
import matplotlib.pyplot as plt
if axes is None:
if fig is None:
fig = plt.gcf()
axes = fig.add_subplot(111)
# TODO: We could reuse the same state.traces outputs for all traces.
generations, chains = state.traces(portion=portion, thin=1, outliers=False)
label = state.labels[var]
axes.clear()
axes.plot(generations, chains[:, :, var])
axes.set_xlabel("Generation number")
axes.set_ylabel(label)
[docs]
def plot_logp(state: MCMCDraw, portion: Optional[float] = None, outliers: bool = False):
from matplotlib.ticker import NullFormatter
import matplotlib.pyplot as plt
from scipy.stats import chi2
# Plot log likelihoods, stripping outliers
genid, logp = state.gen_logp(portion=portion, outliers=outliers)
width, height, margin, delta = 0.7, 0.75, 0.1, 0.01
trace = plt.axes([margin, 0.1, width, height])
trace.plot(genid, logp, ",", markersize=1)
trace.set_xlabel("Generation number")
trace.set_ylabel("Log likelihood at x[k]")
plt.title("Log Likelihood History")
# Plot log likelihood trend line
from bumps.wsolve import wpolyfit
x = genid
y = np.mean(logp, axis=1)
dy = np.std(logp, axis=1, ddof=1)
p = wpolyfit(x, y, dy=dy, degree=1)
px, dpx = p.ci(x, 1.0)
label = f"slope={format_uncertainty(p.coeff[0], p.std[0])}"
trace.plot(x, px, "k-", x, px + dpx, "k-.", x, px - dpx, "k-.")
trace.text(0.02, 0.02, label, transform=trace.transAxes, va="bottom", ha="left")
# Plot long likelihood histogram
data = logp.flatten()
data = data[np.isfinite(data)]
hist = plt.axes([margin + width + delta, 0.1, 1 - 2 * margin - width - delta, height])
hist.hist(data, bins=40, orientation="horizontal", density=True)
hist.set_ylim(trace.get_ylim())
null_formatter = NullFormatter()
hist.xaxis.set_major_formatter(null_formatter)
hist.yaxis.set_major_formatter(null_formatter)
# Plot chisq fit to log likelihood histogram
float_df, loc, scale = chi2.fit(-data, f0=state.Nvar)
df = int(float_df + 0.5)
# from scipy.stats import kstest
# pval = kstest(-data, lambda x: chi2.cdf(x, df, loc, scale))
# with open("/tmp/chi", "a") as fd:
# print("chi2 pars for llf", float_df, loc, scale, pval, file=fd)
xmin, xmax = trace.get_ylim()
x = np.linspace(xmin, xmax, 200)
hist.plot(chi2.pdf(-x, df, loc, scale), x, "r")
def tile_axes(n, size=None, fig=None):
"""
Creates a tile for the axes which covers as much area of the graph as
possible while keeping the plot shape near the golden ratio.
"""
import matplotlib.pyplot as plt
if size is None:
if fig is None:
fig = plt.gcf()
size = fig.get_size_inches()
figwidth, figheight = size
# Golden ratio phi is the preferred dimension
# phi = sqrt(5)/2
#
# nw, nh is the number of tiles across and down respectively
# w, h are the sizes of the tiles
#
# w,h = figwidth/nw, figheight/nh
#
# To achieve the golden ratio, set w/h to phi:
# w/h = phi => figwidth/figheight*nh/nw = phi
# => nh/nw = phi * figheight/figwidth
# Must have enough tiles:
# nh*nw > n => nw > n/nh
# => nh**2 > n * phi * figheight/figwidth
# => nh = floor(sqrt(n*phi*figheight/figwidth))
# => nw = ceil(n/nh)
phi = math.sqrt(5) / 2
nh = int(math.floor(math.sqrt(n * phi * figheight / figwidth)))
if nh < 1:
nh = 1
nw = int(math.ceil(n / nh))
return nw, nh
def plot_acceptance_rate(state: MCMCDraw, portion: Optional[float] = None):
from matplotlib import pyplot as plt
genid, AR = state.acceptance_rate(portion=portion)
plt.plot(genid, AR)
plt.xlabel("Generation #")
plt.ylabel("Acceptance rate (%)")
plt.title("DREAM acceptance rate by generation")