-
Notifications
You must be signed in to change notification settings - Fork 4
/
MCMCEnv.h
274 lines (212 loc) · 10.6 KB
/
MCMCEnv.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
#ifndef MCMCENV_H
#define MCMCENV_H
#pragma once
#include "ClusterTree.h"
#include "ClusterFlags.h"
#include "Matrix.h"
#include <sstream>
#ifdef LINUX
#include <tr1/unordered_map>
#else
#include <unordered_map>
#endif
#define TREE_ID_SIZE 4
class MCMCEnv
{
public:
typedef std::vector<bool> TaoSet;
typedef std::tr1::unordered_map<unsigned long long, ClusterTree> TreeContainer;
typedef std::vector<std::pair<unsigned long, unsigned long long> > TreeSet;
// time and ID
typedef std::vector<std::pair<unsigned long long, unsigned long long> > TreeMergeSet;
// parent ID and child ID
typedef struct {
std::vector<std::string> display;
unsigned long rootLine;
unsigned long birthTime;
} TreeDisplay;
private:
typedef std::pair<unsigned long, unsigned long> TimeCoordinate;
typedef std::map<unsigned long, unsigned long> SortedULContainer;
typedef std::map<unsigned long long, char> RootIDContainer; // char is a dummy holder
TreeContainer MapClusterTrees;
unsigned long long ID_max;
RootIDContainer rootIDs;
// Notice that the row of flags is time, column is sample
//ClusterFlags Flags;
// tao
TaoSet Tao;
static unsigned long NumOfTP;
static unsigned long NumOfGenes;
static unsigned long NumOfData;
static ClusterTree::NumOfSampleContainer DataNumEachTP; // number of data in each time point
static SortedULContainer TPStartIndex, ReverseTPStartIndex; // the starting index for each time point
MCMCEnv(const TaoSet &tao, const ClusterFlags &flags, const matrix &data);
friend class mcmc;
// The mutable variants are the values intended to speed up the calculation process
mutable bool UpToDate; // if true, no calculation is needed
mutable double LogDensityValue;
mutable double LogDensityValueNonFlag;
mutable double LogDensityValueFlag;
static matrix mu1;
static matrix mu0;
static matrix sigma0;
static matrix sigma1;
static double beta0;
static double beta1;
static double deltar;
static double alpha;
static double shape;
static double bk; // For split_merge move and birth_death move, we give equal probability.
static double ranc; // Tail birth is given much more porbability.
static double k1;
static double k0;
static double constSigma0;
static double constSigma1;
static bool buildForest; // whether to build a forest or simply a tree
const matrix &Data;
TimeCoordinate IndexToCoor(unsigned long index) const;
unsigned long CoorToIndex(const TimeCoordinate &corr) const;
static const unsigned int TreeNodeWidth = TREE_ID_SIZE + 2;
// /- O ----- O , this is to specify the number of '-' s here,
// for branching, the rest will be filled with this
TreeDisplay &attachTrees(TreeDisplay &parent, const TreeDisplay &child,
bool attachedToTop) const;
TreeDisplay writeSingleTree(unsigned long long TreeID, bool topPrefered = false) const;
public:
ClusterFlags Flags;
static MCMCEnv initMCMCEnv(unsigned long numOfTimePoints, unsigned long numOfGenes,
unsigned long numOfData, const std::vector<unsigned long> &numOfDataInEachTimePoint,
const TaoSet &tao, const ClusterFlags &flags, const matrix &p_data);
MCMCEnv(const MCMCEnv &oldenv); // duplicate the environment
~MCMCEnv(void);
MCMCEnv &operator=(const MCMCEnv &oldenv);
static unsigned long getDataNum();
matrix reData(unsigned long long flag, bool tao_NULL, const TaoSet &newTao,
std::vector<unsigned long> &taoIndices) const;
matrix reData(unsigned long long flag, bool tao_NULL, const TaoSet &newTao, std::ostream &os) const;
matrix datacov(unsigned long long flag, bool tao_NULL, const TaoSet &newTao) const;
matrix datacov(unsigned long long flag, bool tao_NULL, const TaoSet &newTao, std::ostream &os) const;
static void initializeParameters(const matrix &p_mu1, const matrix &p_mu0, const matrix &p_sigma1,
const matrix &p_sigma0, double p_beta1, double p_beta0, double p_deltar, double p_alpha,
double p_shape, double p_bk, double p_ranc, bool bForest);
inline ClusterTree &getTreeFromID(unsigned long long ID) {
if(MapClusterTrees.find(ID) == MapClusterTrees.end()) {
std::ostringstream ostr;
ostr << "ID " << ID << " not in the list!";
throw(std::logic_error(ostr.str()));
}
return MapClusterTrees.find(ID)->second;
}
inline const ClusterTree &getTreeFromID(unsigned long long ID) const {
if(MapClusterTrees.find(ID) == MapClusterTrees.end()) {
std::ostringstream ostr;
ostr << "ID " << ID << " not in the list!";
throw(std::logic_error(ostr.str()));
}
return MapClusterTrees.find(ID)->second;
}
//inline ClusterTree &getTreeHead();
//inline const ClusterTree &getTreeHead() const;
ClusterTree &createTree(unsigned long long parentID, unsigned long time);
ClusterTree &createTree();
void removeTree(unsigned long long TreeID);
// get the set that can be split, merged, born or killed
TreeSet getSplitSet(bool hasForest, bool forestOnly) const;
//TreeSet getTreeSplitSet() const; // this is to get the new tree (separate process)
TreeMergeSet getMergeSet(bool hasForest, bool forestOnly) const;
//TreeMergeSet getTreeMergeSet() const;
TreeMergeSet getDeathSet(bool hasForest, bool forestOnly) const;
TreeSet getTailBirthSet(bool hasForest, bool forestOnly) const; // time actually doesn't matter here
TreeSet getTailDeathSet(bool hasForest, bool forestOnly) const; // time and ID
// the return value is the new tree / branch ID
unsigned long long flagSplit(double &P_alloc, bool &NULLset, double &f_ui,
const TreeSet::value_type &split_pair);
void flagSplitTest(double &P_alloc, bool &NULLset, double &f_ui,
const TreeSet::value_type &split_pair);
//void flagRootSplit(double &P_alloc, bool &NULLset, double &f_ui, unsigned long long FromID);
void flagMerge(double &P_alloc, double &f_ui, const TreeMergeSet::value_type &merge_pair);
//void flagRootMerge(double &P_alloc, double &f_ui, const TreeMergeSet::value_type &merge_pair);
void flagBirth(const TreeSet::value_type &splitPair,
double &weightratio, double &jacobi, double &f_wstar);
void flagBirth(const TreeSet::value_type &splitPair);
void flagTailBirth(const TreeSet::value_type &splitPair,
double &weightratio, double &jacobi, double &f_wstar);
void flagDeath(const TreeMergeSet::value_type &merge_pair,
double &jacobi, double &weightratio, double &f_wstar);
void flagTailDeath(const TreeSet::value_type &merge_pair,
double &jacobi, double &weightratio, double &f_wstar);
void testNumberSet();
long getNoZeroWeights(unsigned long time) const;
double apSplit(const TreeSet::value_type &splitPair, const bool NULLset, const TreeSet &splitsets,
const long splitnum, const double P_alloc, const double f_ui,
const MCMCEnv &oldenv, unsigned long long newTreeID) const;
double apMerge(const TreeMergeSet::value_type &mergePair, const TreeSet &splitSetsAfterMerge,
const long mergesetsrows, const long splitnumaftermerge, const double P_alloc, const double f_ui,
const MCMCEnv &oldenv) const;
double apBirth(const TreeSet::value_type &splitPair, const long splitnumafterbirth,
const double weightratio, const double jacobi, const double f_wstar,
const MCMCEnv &oldenv) const;
double apDeath(const TreeMergeSet::value_type &mergePair, const long emptynumbeforedeath,
const double weightratio, const double jacobi, const double f_wstar,
const MCMCEnv &oldenv) const;
double apTailBirth(const TreeSet::value_type &splitPair, const long splitnumafterbirth,
const double weightratio, const double jacobi, const double f_wstar,
const MCMCEnv &oldenv) const;
double apTailDeath(const TreeSet::value_type &mergePair, const long emptynumbeforedeath,
const double weightratio, const double jacobi, const double f_wstar,
const MCMCEnv &oldenv) const;
double apRootSplit(unsigned long long FromID, const bool NULLset, const TreeSet &splitsets,
const long splitnum, const double P_alloc, const double f_ui,
const MCMCEnv &oldenv) const;
double apRootMerge(const TreeMergeSet::value_type &mergePair, const TreeSet &splitSetsAfterMerge,
const long mergesetsrows, const long splitnumaftermerge, const double P_alloc, const double f_ui,
const MCMCEnv &oldenv) const;
double apRootBirth(const long splitnumafterbirth,
const double weightratio, const double jacobi, const double f_wstar,
const MCMCEnv &oldenv) const;
double apRootDeath(const TreeMergeSet::value_type &mergePair, const long emptynumbeforedeath,
const double weightratio, const double jacobi, const double f_wstar,
const MCMCEnv &oldenv) const;
double calcLogDensity() const;
double calcLogDensity(const TaoSet &newTao) const;
long taoCount(const TaoSet &newTao) const;
long taoCount() const;
//void changeSigmaToNewTao(const TaoSet &newTao) const;
double calcLogDensityNonFlagPart(long taonum) const;
double calcLogDensityFlagPart(long taonum) const;
double calcLogDensityNonFlagPart(const TaoSet &newTao, long taonum) const;
double calcLogDensityFlagPart(const TaoSet &newTao, long taonum) const;
//void returnSigmaFromTao() const;
double calcPChos(const TreeMergeSet::value_type &parentChildPair, const bool NULLset,
const TreeSet &SplitSet) const;
double calcSimilarity(unsigned long time, const ClusterTree &parent, const ClusterTree &child,
const bool NULLset, const TreeSet &SplitSet) const;
double calcClusterDist(const unsigned long time1, const ClusterTree &cluster1,
const long time2, const ClusterTree &cluster2) const;
matrix calcClusterMean(const unsigned long time, const ClusterTree &cluster) const;
unsigned long getClusterNumber() const; // get the number of trees
unsigned long getNonEmptyClusterNumber() const;
// get the number of trees that are not empty throughout the time
unsigned long getClusterNumber(unsigned long time) const; // get the number of trees
unsigned long getNonEmptyClusterNumber(unsigned long time) const;
// get the number of trees that are not empty throughout the time
double getWeightFromSample(unsigned long time, unsigned long sampleNum) const;
const ClusterTree &getBranchFromSample(unsigned long time, unsigned long sampleNum) const;
void sampleWeight();
void sampleTao();
void sampleZ();
double treeSummary() const;
unsigned long getTreeNumbers() const;
std::ostream &writeTree(std::ostream &os) const;
std::ostream &writeTao(std::ostream &os) const;
std::ostream &writeFlags(std::ostream &os) const;
std::ostream &writeWeights(std::ostream &os) const;
std::ostream &writeStatus(std::ostream &os) const;
double & weight(unsigned long long ID, unsigned long time);
std::ostream &writeSet(std::ostream &os, const TreeSet &set) const;
std::ostream &writeSet(std::ostream &os, const TreeMergeSet &set) const;
friend std::ostream &operator<<(std::ostream &os, const MCMCEnv &env);
};
std::ostream &operator<<(std::ostream &os, const MCMCEnv &env);
#endif