-
Notifications
You must be signed in to change notification settings - Fork 25
/
lemon_slice_reversible.py
161 lines (139 loc) · 6.33 KB
/
lemon_slice_reversible.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import numpy as np
import matplotlib.pyplot as plt
import scikit_tt.data_driven.transform as tdt
from scikit_tt.data_driven import tgedmd
from msmtools.analysis.dense.pcca import _pcca_connected_isa
""" NOTE: This script requires the system class for the Lemon Slice potential and the data. These are
available from the authors upon request. A script to visualize the
results and produce the figures in [1] is available on request.
Application of tgEDMD (algorithm 2 in [1]) to simulation data of a four-dimensional toy system. The dynamics is
overdamped Langevin dynamics, with potential energy given by the two-dimensional "Lemon Slice"
potential along the first two coordinates, and de-coupled harmonic potentials along the remaining
two coordinates.
The basis set consists of Gaussian functions along all coordinate directions. Several choices of
the truncation threshold for the HOSVD of the data tensor are tested. The resulting implied time
scales, eigenfunctions, ranks, and PCCA memberships are stored to file.
References:
[1] Lücke, M. and Nüske, F. tgEDMD: Approximation of the Kolmogorov Operator in Tensor Train Format,
arxiv 2111.09606 (2022)
"""
""" System Settings: """
# Number of dimensions:
d = 4
# Diffusion constant:
beta = 1.0
# Spring constant for harmonic parts of the potential
alpha = 10.0
# Pre-factor for Lemon Slice:
c = 1.0
# Number of minima for Lemon Slice:
k = 4
""" Computational Settings: """
# Location of the data:
directory = "/home/nueske/tgEDMD_Paper/Results_LemonSlice/"
# Directory for storing results:
res_dir = "/home/nueske/tgEDMD_Paper/Results_LemonSlice/"
# Directory for PCCA plots:
fig_dir = "/home/nueske/tgEDMD_Paper/Figures/"
# Integration time step:
dt = 1e-3
# Number of time steps (full data and downsampled data):
m = 300000
m_tgedmd = 3000
delta = int(round(m / m_tgedmd))
# Number of independent simulations:
ntraj = 10
# List of truncation parameters:
eps_list = [1e-8, 1e-6, 1e-4, 1e-3, 1e-2]
# Set maximal rank:
max_rank = 1000
# Number of timescales to record:
nits = 3
# Lag times for MSM construction:
lags_msm = np.array([1, 2, 5, 10, 20, 50, 100])
# Frequency of progress messages during computation:
output_freq = 750
""" Definition of Gaussian basis functions """
# Mean values and variance of Gaussians for first two coordinates:
mean_ls = np.arange(-1.2, 1.21, 0.4)
sig_ls = 0.4
# Mean values and variance of Gaussians for last two coordinates:
mean_quad = np.arange(-1.0, 1.01, 0.5)
sig_quad = 0.5
basis_list = []
for i in range(2):
basis_list.append([tdt.GaussFunction(i, mean_ls[j], sig_ls) for j in range(len(mean_ls))])
for i in range(2, 4):
basis_list.append([tdt.GaussFunction(i, mean_quad[j], sig_quad) for j in range(len(mean_quad))])
""" Load the data: """
# Full data set:
data = np.load(directory + "Simulation_LS_Full.npy")
# Downsampled data set:
data_tgedmd = np.load(directory + "Simulation_LS_delta_%d.npy"%delta)
""" Run tgEDMD """
# Create arrays for timescales, eigenfunctions, TT-ranks and PCCA memberships:
timescales = np.zeros((ntraj, len(eps_list), nits))
eigfuns = np.zeros((ntraj, len(eps_list), nits+1, m_tgedmd))
ranks = np.zeros((ntraj, len(eps_list), d + 2))
chi = np.zeros((ntraj, len(eps_list), nits+1, m_tgedmd), dtype=bool)
# Run reversible tgEDMD for each trajectory independently:
for ii in range(ntraj):
print("Analyzing data for trajectory %d..."%ii)
# Calculate the diffusion matrix for each time step (equals a constant multiple of the identity):
diffusion = np.repeat(np.sqrt(2.0 / beta)*np.eye(d)[:, :, None], m_tgedmd, 2)
qq = 0
for eps in eps_list:
eigvals, traj_eigfuns, ranks_ii = tgedmd.amuset_hosvd(data_tgedmd[ii, :, :], basis_list, diffusion, b=None,
reweight=None, num_eigvals=5,
threshold=np.sqrt(m_tgedmd) * eps,
max_rank=max_rank, return_option='eigenfunctionevals',
output_freq=output_freq)
# Store timescales, eigenfunctions, and ranks:
timescales[ii, qq, :] = [-1.0 / kappa for kappa in eigvals[1:nits+1]]
eigfuns[ii, qq, :, :] = traj_eigfuns[:nits+1, :]
ranks[ii, qq, :] = np.array(ranks_ii)
print('Implied time scales for eps = %.2e : ' % eps, timescales[ii, qq, :])
# Use PCCA to compute metastable memberships:
# Select eigenfunctions:
traj_eigfuns = eigfuns[ii, qq, :, :]
# Remove possible fluctuations along first eigenvector trajectory:
diffs = np.abs(np.max(traj_eigfuns.T, axis=0) - np.min(traj_eigfuns.T, axis=0))
if diffs[0] > 1e-6:
traj_eigfuns[0, :] = traj_eigfuns[0, 0] * np.ones((traj_eigfuns.shape[1]))
# Apply PCCA:
chi_qq, _ = _pcca_connected_isa(traj_eigfuns.T, nits + 1)
chi_qq = chi_qq.T
# Identify state with maximal membership for each time step (hard assignment):
for i in range(chi_qq.shape[1]):
ind = np.argmax(chi_qq[:, i])
chi_qq[:, i] = np.zeros((chi_qq.shape[0],))
chi_qq[ind, i] = 1
chi[ii, qq, :, :] = chi_qq.astype(bool)
qq += 1
print(" ")
# Save Results:
dic = {}
dic["eps"] = eps_list
dic["timescales"] = timescales
dic["eigfuns"] = eigfuns
dic["ranks"] = ranks
dic["chi"] = chi
np.savez_compressed(res_dir + "Results_LS_Mem.npz", **dic)
""" Create plots of PCCA states for all settings: """
for ii in range(ntraj):
qq = 0
for eps in eps_list:
plt.figure(dpi=100)
plt.plot(data_tgedmd[ii, 0, :][chi[ii, qq, 0, :]], data_tgedmd[ii, 1, :][chi[ii, qq, 0, :]], 'bx')
plt.plot(data_tgedmd[ii, 0, :][chi[ii, qq, 1, :]], data_tgedmd[ii, 1, :][chi[ii, qq, 1, :]], 'r*')
plt.plot(data_tgedmd[ii, 0, :][chi[ii, qq, 2, :]], data_tgedmd[ii, 1, :][chi[ii, qq, 2, :]], 'g2')
plt.plot(data_tgedmd[ii, 0, :][chi[ii, qq, 3, :]], data_tgedmd[ii, 1, :][chi[ii, qq, 3, :]], 'y+')
plt.grid()
plt.xlabel(r"$x^1$")
plt.ylabel(r"$x^2$")
plt.xlim([-1.5, 1.5])
plt.ylim([-1.5, 1.5])
plt.title("PCCA traj_id %d, eps = %.1e"%(ii, eps))
plt.savefig(fig_dir + "PCCA_traj_id_%d_eps_%.1e.pdf"%(ii, eps), bbox_inches="tight")
qq += 1
plt.show()