Source code for nmf_models.plotting

"""Plotting functions for intNMF."""

import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
import numpy as np
from nmf_models.nmf_models_mod_updates import intNMF
from typing import Optional, Union, Mapping  # Special
import seaborn as sns
import matplotlib as mpl
import sys
import pandas as pd

sys.setrecursionlimit(100000)

plt.rcParams["figure.figsize"] = [7.00, 3.50]
plt.rcParams["figure.autolayout"] = True
mpl.rc('image', cmap='cividis')
sns.set_palette(plt.cm.cividis(np.linspace(0, 1, 8))[1:-1])


def _view_mat(mat, xlab, ax, ylab=None):
    """Base matrix plotter. Saturates at 1

        Parameters
        ----------
        mat : array-like
            Matrix to plot
        xlab : str
            x axis label
        ax : matplotlib axis
            axis for plot
        ylab : str
            y axis label

        Returns
        -------
        AxesImage
            imshow of matric provided
        matplotlib axes
            axes for figure

    """
    im_ratio = mat.shape[1]/mat.shape[0]
    print(im_ratio)
    im = ax.imshow(np.where(mat > 1, 1, mat), aspect=im_ratio,
                   interpolation=None)
    ax.set_xlabel(xlab)
    if ylab is not None:
        ax.set_ylabel(ylab)

    return im, ax


[docs]def view_embed(nmf_model: intNMF, clustered: Optional[bool] = False): """View the joint embeding matrix of an intNMF object. Parameters ---------- nmf_model : intNMF joint NMF object. With joint embed to plot clustered : boolean cluster cells and topics using seaborn clustermap Returns ------- matplotlib axes axes for figure """ if not clustered: fig, ax = plt.subplots() im, ax = _view_mat(nmf_model.theta, 'topics', ax, 'cells') fig.colorbar(im) fig.tight_layout() else: ax = sns.clustermap(nmf_model.theta, method="complete", vmin=0, vmax=1, yticklabels='None') return ax
[docs]def view_loadings(nmf_model: intNMF, modality: str, clustered: Optional[bool] = False): """View loadings matrix. Parameters ---------- nmf_model : intNMF joint NMF object. With loadings to plot modality : str, "both" | "rna" | "atac" choose modality to plot clustered : boolean cluster topics and features using seaborn clustermap Returns ------- matplotlib axes axes for figure. if modality == 'both' list of axes is returned """ if modality == "both": if not clustered: fig = plt.figure(1, (8, 4)) grid = ImageGrid(fig, 111, nrows_ncols=(1, 2), axes_pad=0.1, cbar_mode='single') im1, ax1 = _view_mat(nmf_model.phi_rna, 'genes', grid[0], 'topics') im2, ax2 = _view_mat(nmf_model.phi_atac, 'regions', grid[1]) cax = grid.cbar_axes[0] cb = fig.colorbar(im2, cax=cax) ax = [ax1, ax2] fig.tight_layout() else: g0 = sns.clustermap(nmf_model.phi_rna, vmin=0, vmax=1, xticklabels='None') g0.fig.suptitle('RNA') # create new gridspec for the right part g1 = sns.clustermap(nmf_model.phi_atac, vmin=0, vmax=1, xticklabels='None') g1.fig.suptitle('ATAC') ax = [g0, g1] elif modality == "atac": if not clustered: fig, ax = plt.subplots() im, ax = _view_mat(nmf_model.phi_atac, 'regions', ax, 'topics') fig.colorbar(im, ax=ax) fig.tight_layout() else: ax = sns.clustermap(nmf_model.phi_atac, method="complete", vmin=0, vmax=1, yticklabels='None') elif modality == "rna": if not clustered: fig, ax = plt.subplots() im, ax = _view_mat(nmf_model.phi_rna, 'genes', ax, 'topics') fig.colorbar(im, ax=ax) fig.tight_layout() else: ax = sns.clustermap(nmf_model.phi_rna, method="complete", vmin=0, vmax=1, yticklabels='None') return ax
[docs]def loss(nmf_model: intNMF, modality: Optional[str] = 'both'): """View loss. Parameters --------- nmf_model: intNMF intNMF model with loss to plot modality: str, 'both' | 'rna' | 'atac' plot combined or modality specific loss. Default combined Returns ------ matplotlib ax """ if modality == "both": fig, ax = plt.subplots() ax.plot(nmf_model.loss, label='total') ax.plot(nmf_model.loss_atac, '--', label='atac') ax.plot(nmf_model.loss_rna, '--', label='rna') ax.legend() if modality == "atac": fig, ax = plt.subplots() ax.plot(nmf_model.loss_atac) if modality == "rna": fig, ax = plt.subplots() ax.plot(nmf_model.loss_rna) return ax
[docs]def correlation_plot(cor_mat, clustered: Optional[bool] = False, ax=None): """view matrix correlation Parameters ---------- cor_mat : array-lile correlation matrix to plot clustered : bool heatmap or clustermap ax : matplotlib Axes, default None matplotlib ax to plot on. Returns ------- matplotlib ax if clustered False and seaborn clustergrid if clustered True """ if not clustered: if ax is not None: ax = sns.heatmap(cor_mat, cmap='RdBu_r', annot=True, annot_kws={"size": 7}, vmin=-1, vmax=1, ax=ax) else: ax = sns.heatmap(cor_mat, cmap='RdBu_r', annot=True, annot_kws={"size": 7}, vmin=-1, vmax=1) else: ax = sns.clustermap(cor_mat, method="complete", cmap='RdBu_r', annot=True, annot_kws={"size": 7}, vmin=-1, vmax=1) return ax
[docs]def correlation_embed(nmf_model: intNMF, clustered: Optional[bool] = False): """view matrix correlation of intNMF embedding Parameters ---------- nmf_model : intNMF intNMF model with embed to compare correlations for. (.fit() has been called) clustered : bool heatmap or clustermap Returns ------- matplotlib ax if clustered False and seaborn clustergrid if clustered True """ ax = correlation_plot(np.round(np.corrcoef(nmf_model.theta.T), 2), clustered) return ax
[docs]def correlation_loadings(nmf_model: intNMF, modality: Optional[str]='both', clustered: Optional[bool]=False): """view matrix correlation of intNMF loadings Parameters ---------- nmf_model : intNMF intNMF model with loadings to compare correlations for. (.fit() has been called) modality : str, 'both' | 'rna' | 'atac modality to plot loading for. Default both clustered : bool heatmap or clustermap Returns ------- matplotlib axes or list of matplotlib axis or seraborn clustergrid or seaborn Gridspec Axes for figure. If clustered = False - matplotlib objects used otherwise seaborn objects used. If modality = 'both' list of axes is returned if matplotlib and Gridspec if seaborn. """ rna_cor = np.round(np.corrcoef(nmf_model.phi_rna), 2) atac_cor = np.round(np.corrcoef(nmf_model.phi_atac), 2) if modality == "both": if not clustered: fig, axs = plt.subplots(nrows=1, ncols=2) axs[0] = correlation_plot(rna_cor, clustered, axs[0]) axs[0].set_title('RNA') axs[1] = correlation_plot(atac_cor, clustered, axs[1]) axs[1].set_title('ATAC') ax = axs fig.tight_layout() else: g0 = sns.clustermap(rna_cor, method="complete", cmap='RdBu_r', annot=True, annot_kws={"size": 7}, vmin=-1, vmax=1) g0.fig.suptitle('RNA') # create new gridspec for the right part g1 = sns.clustermap(atac_cor, method="complete", cmap='RdBu_r', annot=True, annot_kws={"size": 7}, vmin=-1, vmax=1) g1.fig.suptitle('ATAC') ax = [g0, g1] elif modality == "atac": if not clustered: fig, ax = plt.subplots() ax = correlation_plot(atac_cor, clustered) ax.set_title('ATAC loading correlation') fig.tight_layout() else: ax = sns.clustermap(atac_cor, method="complete", cmap='RdBu_r', annot=True, annot_kws={"size": 7}, vmin=-1, vmax=1) ax.fig.suptitle('ATAC loading correlation') elif modality == "rna": if not clustered: fig, ax = plt.subplots() ax = correlation_plot(rna_cor, clustered) ax.set_title('RNA loading correlation') fig.tight_layout() else: ax = sns.clustermap(rna_cor, method="complete", cmap='RdBu_r', annot=True, annot_kws={"size": 7}, vmin=-1, vmax=1) ax.fig.suptitle('RNA loading correlation') return ax
[docs]def embed_plot(nmf_model: intNMF, TX: Optional[int] = 0, TY: Optional[int] = 1, labs: Optional[list] = None, scale_x: Optional[float] = 1, scale_y: Optional[float] = 1, title: Optional[str] = None, y_lab: Optional[str] = None, x_lab: Optional[str] = None): """Plot nmf embedding. Parameters ---------- nmf_model : intNMF intNMF model with embed to view (.fit has been run) TX : int factor/topic for x-axishttp://localhost:8888 TY : int factor/topic for y-axis labs : list cell labels used to colour plot. discrete or continuos determined by fraction of unique elements scale_x : float value to divide x-axis values by scale_y : float value to divide y-axis values by title : str plot title, default None y_lab : str y-axis label, default None x_lab : str x-axis label, default None Returns ------- matplotlib ax """ fig, ax = plt.subplots() if labs is not None: # treat labs as continous if len(np.unique(labs))/len(labs) > 0.05: im = ax.scatter(nmf_model.theta[:, TX]*scale_x, nmf_model.theta[:, TY]*scale_y, c=labs) fig.colorbar(im, ax=ax) # treat labs as discrete else: lab_cmap = dict((val, ["{}".format(val), mpl.colormaps['rainbow'](i/len(np.unique(labs)))]) for i, val in enumerate(np.unique(labs))) u_lab, u_col = map(list, zip(*lab_cmap.values())) label, colour = map(list, zip(*[lab_cmap[el] for el in labs])) ax.scatter(nmf_model.theta[:, TX]*scale_x, nmf_model.theta[:, TY]*scale_y, c=colour, label=label) # produce a legend with the unique colors from the scatter handles = [plt.scatter([0,0],[0,0], color=col, marker='o') for col in u_col] ax.legend(handles, u_lab, ncol=2) else: ax.scatter(nmf_model.theta[:, TX]*scale_x, nmf_model.theta[:, TY]*scale_y) if x_lab is None: ax.set_xlabel("Factor {}".format(TX)) else: ax.set_xlabel(x_lab) if y_lab is None: ax.set_ylabel("Factor {}".format(TY)) else: ax.set_ylabel(y_lab) if title is not None: ax.set_title(title) fig.tight_layout() return ax
[docs]def factor_biplot(nmf_model: intNMF, TX: Optional[int] = 0, TY: Optional[int] = 1, n_labs: Optional[int] = 5, modality: Optional[str] = 'rna', labs: Optional[list] = None, select: Optional[list] = None, title: Optional[str] = None, x_lab: Optional[str] = None, y_lab: Optional[str] = None, mode: Optional[str] = 'abs'): """Plot nmf embedding. With features projected on. Plot is scaled so that the projected features don't exceed 1. Parameters ---------- nmf_model : intNMF intNMF model with embed to view (.fit has been run) TX : int factor/topic for x-axis TY : int factor/topic for y-axis n_labs: int number of features to plot per topic (i.e. 2) modality: str, 'rna' | 'atac' select modality from which features are projected from. labs : list cell labels used to colour plot. Discrete or continuos determined by fraction of unique elements select : list list of features to project title : str plot title, default None y_lab : str y-axis label, default None x_lab : str x-axis label, default None mode : str, 'abs' | 'diff' select features to plot by absolute value or by difference Returns ------- matplotlib ax list of features annotated """ scale_x_embed = 1.0/nmf_model.theta[:, TX].max() scale_y_embed = 1.0/nmf_model.theta[:, TY].max() ax = embed_plot(nmf_model, TX=TX, TY=TY, labs=labs, scale_x=scale_x_embed, scale_y=scale_y_embed, title=title, x_lab=x_lab, y_lab=y_lab) if modality == 'rna': phi = nmf_model.phi_rna col_names = nmf_model.rna_features elif modality == 'atac': phi = nmf_model.phi_atac col_names = nmf_model.atac_features else: print('select modality from rna or atac') return if col_names is None: print('Missing feature labels continuing anyway') phi_df = pd.DataFrame(phi, columns=col_names, index=['Topic' + str(i) for i in np.arange(nmf_model.k)]) if mode == 'abs': top_features_TX = phi_df.iloc[TX, :].sort_values(ascending=False)[0:n_labs] top_features_TY = phi_df.iloc[TY, :].sort_values(ascending=False)[0:n_labs] elif mode == 'diff': feature_diff = (phi_df.iloc[TX, :] - phi_df.iloc[TY, :]).sort_values(ascending=False) top_features_TX = feature_diff[0:n_labs] top_features_TY = feature_diff[-n_labs:] else: print('wrong mode selection') return features_to_add = np.append(top_features_TX.index.values, top_features_TY.index.values) if select is not None and set(select).issubset(phi_df.columns): features_to_add = np.append(features_to_add, select) scale_x_loading = 1/(phi_df.loc[phi_df.index[TX], features_to_add].max()) scale_y_loading = 1/(phi_df.loc[phi_df.index[TY], features_to_add].max()) for feature in features_to_add: x_coord = phi_df.loc[phi_df.index[TX], feature]*scale_x_loading y_coord = phi_df.loc[phi_df.index[TY], feature]*scale_y_loading ax.plot([0, x_coord], [0, y_coord], color='tab:orange') ax.text(x_coord - 0.05, y_coord * 1.05, feature) plt.tight_layout() return ax, features_to_add
[docs]def plot_weights_rank(nmf_model: intNMF, factor: int, n_labs: Optional[int] = 5, modality: Optional[str] = 'rna', select: Optional[list] = None, title: Optional[str] = None, ax = None): """Rank plot of the feature weights. Parameters ---------- nmf_model : intNMF intNMF model with loadings to rank (.fit has been run) factor : int factor/topic to plot feature weights for n_labs: int number of features to label modality: str, 'rna' | 'atac' select modality from which features are ranked. select : list[str] list of additional features to label title : str plot title, default None ax : matplotlib ax matplotlib ax to plot on Returns ------- matplotlib ax """ if modality == 'rna': phi = nmf_model.phi_rna col_names = nmf_model.rna_features elif modality == 'atac': phi = nmf_model.phi_atac col_names = nmf_model.atac_features else: print('select modality from rna or atac') return if col_names is None: print('Missing feature labels continuing anyway') phi_df = pd.DataFrame(phi, columns=col_names, index=['Factor {}'.format(i) for i in np.arange(nmf_model.k)]) # Plot ranks ranks = phi_df.iloc[factor, :].sort_values(ascending=False) if ax is None: fig, ax = plt.subplots() ax.scatter(np.arange(len(ranks)), ranks) # Plot annotations of top ranked genes and named ranks ranks_df = ranks.to_frame() ranks_df['rank'] = np.arange(len(ranks_df)) top_N = ranks_df.iloc[0:n_labs, :] if select is not None and set(select).issubset(phi_df.columns): print('adding genes') genes_to_add = ranks_df.loc[select, :] top_N = pd.concat([top_N, genes_to_add]) jitter = np.linspace(0.1, -1, num=len(top_N)) for i, (index, row) in enumerate(top_N.iterrows()): rank = int(row[1]) value = row[0] ax.annotate(index, xy=(rank, value), xytext=(rank + (len(ranks)*0.1), np.abs(value + jitter[i])), arrowprops=dict(facecolor='black', shrink=0.05, width=0.5, headwidth=1)) ax.set_xlabel('Feature rank') ax.set_ylabel('Factor loading') if title is not None: ax.set_title(title) return ax
[docs]def plot_weights_scatter(nmf_model: intNMF, factors: tuple, n_labs: Optional[int] = 5, modality: Optional[str] = 'rna', select: Optional[list] = None, title: Optional[str] = None): """Rank plot of the feature weights. Parameters ---------- nmf_model : intNMF intNMF model with loadings to plot (.fit has been run) factors : tuple(int, int) factor/topics to plot feature weights for n_labs: int number of features to label per topic/factor modality: str, 'rna' | 'atac' select modality from which features are plotted. select : list[str] list of additional features to label title : str plot title, default None Returns ------- matplotlib ax """ TX = factors[0] TY = factors[1] if modality == 'rna': phi = nmf_model.phi_rna col_names = nmf_model.rna_features elif modality == 'atac': phi = nmf_model.phi_atac col_names = nmf_model.atac_features else: print('select modality from rna or atac') return if col_names is None: print('Missing feature labels continuing anyway') phi_df = pd.DataFrame(phi, columns=col_names, index=['Factor {}'.format(i) for i in np.arange(nmf_model.k)]) fig, ax = plt.subplots() ax.scatter(phi_df.loc[phi_df.index[TX], :], phi_df.loc[phi_df.index[TY], :]) top_features_TX = phi_df.iloc[TX, :].sort_values(ascending=False)[0:n_labs].sort_values(ascending=True) top_features_TY = phi_df.iloc[TY, :].sort_values(ascending=False)[0:n_labs] features_to_add = np.append(top_features_TY.index.values, top_features_TX.index.values) if select is not None and set(select).issubset(phi_df.columns): features_to_add = np.append(features_to_add, select) jitter = np.linspace(0, np.pi, num=len(features_to_add)) for i, feature in enumerate(features_to_add): x_coord = phi_df.loc[phi_df.index[TX], feature] y_coord = phi_df.loc[phi_df.index[TY], feature] ax.annotate(feature, xy=(x_coord, y_coord), xytext=(x_coord + 0.2*np.sin(jitter[i]), y_coord + 0.2*np.cos(jitter[i])), arrowprops=dict(facecolor='black', shrink=0.05, width=0.5, headwidth=1)) ax.set_xlabel(phi_df.index[TX]) ax.set_ylabel(phi_df.index[TY]) return ax
[docs]def plot_weights_bar(nmf_model: intNMF, genes: list, factors: Optional[list] = None, modality: Optional[str] = 'rna', title: Optional[str] = None): """Plot stacked bar chart. With features weights plotted per feature and coloured the factor. Parameters ---------- nmf_model : intNMF intNMF model with loadings to plot (.fit has been run) genes : list[str] list of features whose weights are plooted. (used to determine the number of bars) factors : list[int] factor/topics to plot feature weights. (used to color sections of the bar) modality: str, 'rna' | 'atac' select modality from which features are plotted. title : str plot title, default None Returns ------- matplotlib ax """ if modality == 'rna': phi = nmf_model.phi_rna col_names = nmf_model.rna_features elif modality == 'atac': phi = nmf_model.phi_atac col_names = nmf_model.atac_features else: print('select modality from rna or atac') return if col_names is None: print('Missing feature labels continuing anyway') phi_df = pd.DataFrame(phi, columns=col_names, index=['Factor {}'.format(i) for i in np.arange(nmf_model.k)]) if not set(genes).issubset(phi_df.columns): print('features given not in features') return if factors is None: factors = np.arange(nmf_model.k) plot_df = phi_df.loc[phi_df.index[factors], genes] if plot_df.shape[0] > 10: lab_cmap = dict((val, mpl.colormaps['rainbow'](i/plot_df.nmf_models.nmf_modelsshape[0])) for i, val in enumerate(plot_df.index)) else: lab_cmap = dict((val, plt.get_cmap('tab10')(i)) for i, val in enumerate(plot_df.index)) fig, ax = plt.subplots() bottom = np.zeros(len(genes)) for index, row in plot_df.iterrows(): ax.bar(row.index, row.values, 0.5, label=index, bottom=bottom, color=lab_cmap[index]) bottom+=row.values ax.legend(ncol=2) ax.tick_params(axis='x', labelrotation=45) return ax