MCMC

In order to run this notebook yourself, you will need the dataset located here: - Go to https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE100099

  • Download the file GSE100099_RNASeqGEO.tsv.gz

[1]:
import tensorflow as tf

from timeit import default_timer as timer
from IPython.display import display
import matplotlib.pyplot as plt

from lafomo.datasets import DataHolder, HafnerData
from lafomo.utilities.tf import discretise, logistic, LogisticNormal, inverse_positivity
from lafomo.plot import mcmc_plotters
from lafomo.mcmc.models import TranscriptionRegulationLFM
from lafomo.configuration import MCMCConfiguration

import numpy as np
import pandas as pd


f64 = np.float64
np.set_printoptions(threshold=np.inf)
np.set_printoptions(formatter={'float': lambda x: "{0:0.10f}".format(x)})
[2]:
target_genes = [
    'KAZN','PMAIP1','PRKAB1','CSNK1G1','E2F7','SLC30A1',
    'PTP4A1','RAP2B','SUSD6','UBR5-AS1','RNF19B','AEN','ZNF79','XPC',
    'FAM212B','SESN2','DCP1B','MDM2','GADD45A','SESN1','CDKN1A','BTG2'
]
known_target_genes = [
    'CDKN1A', #p21
    'SESN1', #hPA26
    'DDB2',
    'TNFRSF10B',
    'BIK',
]
dataset = HafnerData(replicate=0, extra_targets=False, data_dir='../../../data')
num_replicates = 1
num_genes = len(dataset.gene_names)
num_tfs = 1
num_times = dataset[0][0].shape[0]

t_observed = np.arange(0, 13)
t_predict = tf.linspace(0, 13, 80)

m_observed = np.stack([
    dataset[i][1] for i in range(num_genes*num_replicates)
])
m_observed = m_observed.reshape(num_replicates, num_genes, num_times)
m_observed = np.float64(m_observed)
print(type(m_observed))
f_observed = dataset.tfs

replicate = 0

τ, common_indices = discretise(t_observed, num_disc=13)
data = DataHolder((m_observed, f_observed), None, (t_observed, τ, tf.constant(common_indices)))

print(f_observed.shape, m_observed.shape)
<class 'numpy.ndarray'>
(2, 1, 13) (1, 22, 13)
[3]:
opt = MCMCConfiguration(
    preprocessing_variance=False,
    latent_data_present=False,
    delays=False,
    weights=False,
    kinetic_exponential=False,
    initial_step_sizes={'nuts': 0.000006, 'latents': 10},
    kernel='rbf'
)

model = TranscriptionRegulationLFM(data, opt)

[4]:
start = timer()
model.sample(T=2000, burn_in=0)
end = timer()
print(f'Time taken: {(end - start):.04f}s')
----- Sampling Begins -----
Preparing HMCSampler ['basal', 'decay', 'sensitivity', 'initial', 'protein_decay']
Preparing LatentGPSampler ['latent']
Preparing GibbsSampler ['σ2_f']
Preparing GibbsSampler ['σ2_m']
WARNING:tensorflow:From C:\Users\Jacob\miniconda3\envs\wishart\lib\site-packages\tensorflow\python\ops\linalg\linear_operator_diag.py:175: calling LinearOperator.__init__ (from tensorflow.python.ops.linalg.linear_operator) with graph_parents is deprecated and will be removed in a future version.
Instructions for updating:
Do not pass `graph_parents`.  They will  no longer be used.
Progress:        100%   | "==================="""|
----- Finished -----
Time taken: 28.2938s
[5]:
do_save = False
do_load = False
if do_save:
    model.save('human')
if do_load:
    # Initialise from saved model:
    model = TranscriptionRegulationLFM.load('human', [data, opt])
    is_accepted = model.is_accepted
[6]:
is_accepted = model.sampler.is_accepted

pcs = list()
for i, subsampler in enumerate(model.subsamplers):
    pcs.append(tf.reduce_mean(tf.cast(is_accepted[i], dtype=tf.float32)).numpy())

display(pd.DataFrame([[f'{100*pc:.02f}%' for pc in pcs]], columns=list(model.subsamplers)))

plot_opt = mcmc_plotters.PlotOptions(
    num_plot_genes=10, num_plot_tfs=10,
    gene_names=np.array(dataset.gene_names), tf_names=np.array(['p53']),
    true_label='Hafner et al.', for_report=False, ylabel='normalised FPKM',
    kernel_names=model.kernel_selector.names(), num_hpd=200, tf_present=False
)
plotter = mcmc_plotters.Plotter(data, plot_opt)

# Calculate gene samples
results = model.results(burnin=300)
print(results['basal'].shape)

HMCSampler ['basal', 'decay', 'sensitivity', 'initial', 'protein_decay'] LatentGPSampler ['latent'] GibbsSampler ['σ2_f'] GibbsSampler ['σ2_m']
0 43.05% 2.65% 100.00% 100.00%
(300, 22, 1)
[7]:
print(results['decay'][-1])# delay = results.delay[-i] if self.options.delays else None
results = model.results(1000)
noise = results['σ2_m']
print(noise.shape)
plt.plot(noise[:, 0])
tf.Tensor(
[[1.4224742744]
 [1.1476091422]
 [1.3210484797]
 [1.5196166648]
 [1.4572523680]
 [1.3085409486]
 [0.9595183101]
 [1.0623496137]
 [1.5579451644]
 [1.4260603754]
 [1.3968099414]
 [1.1618437379]
 [1.2903149791]
 [1.4038266036]
 [1.5116862156]
 [1.0644957738]
 [1.6601600178]
 [1.0283721659]
 [1.0682975522]
 [1.0237053279]
 [0.4247928967]
 [1.0050527617]], shape=(22, 1), dtype=float64)
(1000, 22, 1)
[7]:
[<matplotlib.lines.Line2D at 0x1ff95a8ebc8>]
../../_images/notebooks_nonlinear_mcmc_7_2.png
[8]:
print(results['basal'][-1])
tf.Tensor(
[[1.2736846675]
 [1.1612027926]
 [1.3405749439]
 [1.5363976087]
 [1.2155577760]
 [1.3900892509]
 [1.2887085097]
 [1.3055802189]
 [1.7847837284]
 [1.5887237032]
 [1.3047584823]
 [1.7557271803]
 [1.6620260188]
 [1.4295836518]
 [1.1879562864]
 [1.3399407655]
 [1.3349149028]
 [1.3090938891]
 [1.2039111988]
 [1.7332063237]
 [10.3559737338]
 [1.8533600292]], shape=(22, 1), dtype=float64)
[9]:
m_preds = model.sample_latents(results, 20)

plotter.plot_outputs(m_preds, replicate=0, height_mul=2, indices=[5, 11, 20, 21])
# plt.legend(bbox_to_anchor=(0, -1));
../../_images/notebooks_nonlinear_mcmc_9_0.png
[10]:
kinetic_params = ['basal', 'decay', 'sensitivity']
kinetics = np.stack([results[k] for k in kinetic_params]).transpose((1, 2, 0, 3)).squeeze(-1)
mean_kinetics = np.mean(kinetics[-50:], axis=0)

dec = np.array([0.284200056, 0.399638904, 0.062061123]) #todo incorrect order
sens = np.array([4.002484948, 32.89511304, 4.297906129])
sens = np.array([
    0.232461671,0.429175332,1.913169606,0.569821512,2.139812962,0.340465324,
    4.203117214,0.635328943,0.920901229,0.263968666,1.360004451,4.816673998,
    0.294392325,2.281036308,0.86918333,2.025737447,1.225920534,11.39455009,
    4.229758095,4.002484948,32.89511304,7.836815916])
dec = np.array([
    0.260354271,0.253728801,0.268641114,0.153037374,0.472215028,0.185626363,
    0.210251586,0.211915623,0.324826082,0.207834775,0.322725728,0.370265667,
    0.221598164,0.226897275,0.409710437,0.398004589,0.357308033,0.498836353,
    0.592101838,0.284200056,0.399638904,0.463468107])

true_k = np.zeros((num_genes, 4))

dec = dec/np.mean(dec) * np.mean(mean_kinetics[:, 1])
true_k[:,2] = 1.02*dec

sens = sens/np.mean(sens)* np.mean(mean_kinetics[:, 2])
true_k[:,3] = sens
print(kinetics.shape)
transform = np.exp
transform = None
plotter.plot_kinetics(results, kinetic_params,
                      xlabels=np.array(dataset.gene_names),
                      title='Transcription ODE Kinetic Parameters',
                      true_k=true_k, transform=transform)

plotter.plot_kinetics(results, ['protein_decay'], transform=transform)
(1000, 22, 3)
C:\Users\Jacob\miniconda3\envs\wishart\lib\site-packages\arviz\stats\stats.py:496: FutureWarning: hdi currently interprets 2d data as (draw, shape) but this will change in a future release to (chain, draw) for coherence with other functions
  FutureWarning,
[10]:
(array([[0.1227541663]]), array([[[0.0001029408, 0.0000655517]]]))
../../_images/notebooks_nonlinear_mcmc_10_3.png
../../_images/notebooks_nonlinear_mcmc_10_4.png
[11]:
plotter.plot_convergence(results, kinetic_params,
                         title='Convergence of ODE Kinetic Parameters',
                         transform=transform)

f_samples = inverse_positivity(results['latent'][0])
plotter.plot_latents(f_samples, replicate=replicate)
plotter.plot_outputs(m_preds, replicate=replicate)
No handles with labels found to put in legend.
No handles with labels found to put in legend.
No handles with labels found to put in legend.
../../_images/notebooks_nonlinear_mcmc_11_1.png
../../_images/notebooks_nonlinear_mcmc_11_2.png
../../_images/notebooks_nonlinear_mcmc_11_3.png
[11]:
print(f_samples.shape)
(1000, 2, 1, 169)
[12]:
# from IPython.display import HTML
#
# HTML(plotter.anim_latent(f_samples))
[13]:
plt.figure()
plotter.plot_grn(results, kinetic_params)

../../_images/notebooks_nonlinear_mcmc_14_0.png
[14]:
results['sensitivity'] *= 2
print(results['sensitivity'][-1])
tf.Tensor(
[[3.7869732760]
 [1.5948173528]
 [2.5829769128]
 [2.9097265254]
 [3.4280450937]
 [3.1466930544]
 [3.2407840668]
 [1.3438523987]
 [2.6966521072]
 [2.5841391496]
 [2.0323563896]
 [1.5513953097]
 [4.9351893593]
 [3.2298924325]
 [1.8849046920]
 [2.4678941589]
 [0.9910152587]
 [1.9445042607]
 [4.4556294380]
 [4.9614972413]
 [4.4979334447]
 [3.0348057021]], shape=(22, 1), dtype=float64)
[15]:
# plt.figure()
# kp = np.array(results.kernel_params).swapaxes(0,1)
# kp_latest = np.mean(kp[-50:], axis=0)
# self.plot_bar_hpd(kp, kp_latest, self.opt.kernel_names)


[16]:
# Plot proteins
p_samples = model.sample_proteins(results, 20)[:,0]
print(p_samples.shape)
plotter.plot_samples(p_samples, [''], 4, color='orangered')
plt.xlim(0, 10)
plt.ylim(-0.05, 2.5)
print(8.51/4.72)
plt.figure(figsize=(4*1.80297, 4))
p = p_samples[-1]
y = [1.5, 4.8, 13.7, 5, 2, 1.4, 3.2, 4, 1.4, 1.5]
# y = y/np.mean(y)*np.mean(p) * 1.75-0.16
# y = scaler.fit_transform(np.expand_dims(y, 0))
plt.plot(t[1:11], y)
plt.ylim(0, 15)
plt.ylabel('p53 fold change')
plt.xlabel('Time (h)')
plt.xticks(np.arange(1, 11))
plt.yticks(np.arange(0, 16, 5))

plt.tight_layout()
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-16-d7c02cd8c583> in <module>
      1 # Plot proteins
----> 2 p_samples = model.sample_proteins(results, 20)[:,0]
      3 print(p_samples.shape)
      4 plotter.plot_samples(p_samples, [''], 4, color='orangered')
      5 plt.xlim(0, 10)

~\Documents\proj\lafomo\lafomo\mcmc\models\transcriptional.py in sample_proteins(self, results, num_results)
    277         for i in range(1, num_results + 1):
    278             delta = results['delay'][i] if results['delay'] is not None else None
--> 279             p_samples.append(self.likelihood.calculate_protein(results.fbar[-i],
    280                                                                results.k_fbar[-i], delta))
    281         return np.array(p_samples)

KeyError: 'delay'
[ ]:
from tensorflow_probability import distributions as tfd
from lafomo.utilities.tf import jitter_cholesky, inverse_positivity

kparams = model.kernel_selector.initial_params()
print(kparams[0])

step_size = 1 * tf.ones(169, dtype='float64')
latent_sampler = model.subsamplers[1]

current_state = 0.3* tf.ones((2, 1, 169), dtype='float64')

new_state = tf.identity(current_state)
new_params = []
S = tf.linalg.diag(step_size)
invS = tf.expand_dims(tf.linalg.inv(S), 0)

# MH
m, K = latent_sampler.fbar_prior_params(kparams[0], kparams[1])
# Propose new params
v = model.kernel_selector.proposal(0, kparams[0]).sample()
l2 = model.kernel_selector.proposal(1, kparams[1]).sample()
m_, K_ = latent_sampler.fbar_prior_params(v, l2)
print(v)
# Draw surrogate data
fbar = new_state
g = tfd.MultivariateNormalDiag(fbar, step_size).sample()
# assume only one TF:
g = tf.transpose(g, (0, 2, 1))
fbar = tf.transpose(fbar, (0, 2, 1))
# g is quite a large random sample centred around fbar with
# noise proportional to step_size
# plt.plot(g[0][0])

# Compute K_i(K_i + S)^-1
R = S - tf.matmul(S, tf.matmul(tf.linalg.inv(S+K), S))
L_R = jitter_cholesky(R)
R_ = S - tf.matmul(S, tf.matmul(tf.linalg.inv(S+K_), S))
L_R_ = jitter_cholesky(R_)

print(R.shape, invS.shape, g.shape)
m_g = tf.matmul(tf.matmul(R, invS), g)
m_g_ = tf.matmul(tf.matmul(R_, invS), g)

plt.plot(m_g[0,:,0])
plt.plot(m_g_[0,:,0])

nu = tf.matmul(tf.linalg.inv(L_R), (fbar-m_g))
print(fbar.shape, m_g.shape, (fbar-m_g).shape)
# Compute chol(K-K(K+S)^-1 K)
# L = jitter_cholesky(K-tf.matmul(Ksuminv, K))
# c_mu = tf.linalg.matvec(Ksuminv, g)
# # Compute nu = L^-1 (f-mu)
# invL = tf.linalg.inv(L)
# nu = tf.linalg.matvec(invL, fbar-c_mu)
#
# Ksuminv = tf.matmul(K_, tf.linalg.inv(K_+S))
# L = jitter_cholesky(K_-tf.matmul(K_, Ksuminv))
# c_mu = tf.linalg.matvec(Ksuminv, g)
print(L_R_.shape, nu.shape, tf.linalg.matmul(L_R_, nu).shape)
fstar = tf.linalg.matmul(L_R_, nu) + m_g_
print(fstar.shape)
fstar = tf.transpose(fstar, (0, 2, 1))
plt.figure()
plt.plot(inverse_positivity(fstar)[0][0])
print('final shape', fstar.shape)
[ ]: