-
Notifications
You must be signed in to change notification settings - Fork 4
/
clickhouse_materialized_views.sql
335 lines (312 loc) · 15.6 KB
/
clickhouse_materialized_views.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
DROP TABLE IF EXISTS sample_clinical_attribute_numeric;
DROP TABLE IF EXISTS sample_clinical_attribute_categorical;
DROP TABLE IF EXISTS patient_clinical_attribute_numeric;
DROP TABLE IF EXISTS patient_clinical_attribute_categorical;
DROP TABLE IF EXISTS sample_columnstore;
DROP TABLE IF EXISTS sample_list_columnstore;
DROP TABLE IF EXISTS genomic_event;
DROP VIEW IF EXISTS sample_clinical_attribute_numeric_mv;
DROP VIEW IF EXISTS sample_clinical_attribute_categorical_mv;
DROP VIEW IF EXISTS patient_clinical_attribute_numeric_mv;
DROP VIEW IF EXISTS patient_clinical_attribute_categorical_mv;
DROP VIEW IF EXISTS sample_columnstore_mv;
DROP VIEW IF EXISTS sample_list_columnstore_mv;
DROP VIEW IF EXISTS genomic_event_mutation_mv;
DROP VIEW IF EXISTS genomic_event_cna_mv;
DROP VIEW IF EXISTS genomic_event_struct_var_mv;
CREATE TABLE sample_clinical_attribute_numeric (
sample_unique_id VARCHAR(45),
patient_unique_id VARCHAR(45),
attribute_name VARCHAR(45),
attribute_value FLOAT,
cancer_study_identifier VARCHAR(45)
)
ENGINE = MergeTree()
ORDER BY (sample_unique_id, patient_unique_id, attribute_name, cancer_study_identifier);
INSERT INTO sample_clinical_attribute_numeric
SELECT
concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id,
concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id,
clinical_sample.attr_id as attribute_name,
cast(clinical_sample.attr_value as float) as attribute_value,
cs.cancer_study_identifier as cancer_study_identifier
FROM cancer_study cs
INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id
INNER JOIN sample s on p.internal_id = s.patient_id
INNER JOIN clinical_sample ON s.internal_id = clinical_sample.internal_id
WHERE match(clinical_sample.attr_value, '^[\d\.]+$');
CREATE MATERIALIZED VIEW sample_clinical_attribute_numeric_mv
TO sample_clinical_attribute_numeric AS
SELECT
concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id,
concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id,
clinical_sample.attr_id as attribute_name,
cast(clinical_sample.attr_value as float) as attribute_value,
cs.cancer_study_identifier as cancer_study_identifier
FROM cancer_study cs
INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id
INNER JOIN sample s on p.internal_id = s.patient_id
INNER JOIN clinical_sample ON s.internal_id = clinical_sample.internal_id
WHERE match(clinical_sample.attr_value, '^[\d\.]+$');
CREATE TABLE sample_clinical_attribute_categorical (
sample_unique_id VARCHAR(45),
patient_unique_id VARCHAR(45),
attribute_name VARCHAR(45),
attribute_value VARCHAR(45),
cancer_study_identifier VARCHAR(45)
)
ENGINE = MergeTree()
ORDER BY (sample_unique_id, patient_unique_id, attribute_name, cancer_study_identifier);
INSERT INTO sample_clinical_attribute_categorical
SELECT
concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id,
concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id,
cl.attr_id as attribute_name,
cl.attr_value as attribute_value,
cs.cancer_study_identifier as cancer_study_identifier
FROM cancer_study cs
INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id
INNER JOIN sample s on p.internal_id = s.patient_id
INNER JOIN clinical_sample cl on s.internal_id = cl.internal_id
WHERE NOT match(cl.attr_value, '^[\d\.]+$');
CREATE MATERIALIZED VIEW sample_clinical_attribute_categorical_mv
TO sample_clinical_attribute_categorical AS
SELECT
concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id,
concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id,
cl.attr_id as attribute_name,
cl.attr_value as attribute_value,
cs.cancer_study_identifier as cancer_study_identifier
FROM cancer_study cs
INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id
INNER JOIN sample s on p.internal_id = s.patient_id
INNER JOIN clinical_sample cl on s.internal_id = cl.internal_id
WHERE NOT match(cl.attr_value, '^[\d\.]+$');
CREATE TABLE patient_clinical_attribute_numeric (
patient_unique_id VARCHAR(45),
attribute_name VARCHAR(45),
attribute_value FLOAT,
cancer_study_identifier VARCHAR(45)
)
ENGINE = MergeTree()
ORDER BY (patient_unique_id, attribute_name, cancer_study_identifier);
INSERT INTO patient_clinical_attribute_numeric
SELECT
concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id,
cp.attr_id as attribute_name,
cast(cp.attr_value as float) as attribute_value,
cs.cancer_study_identifier as cancer_study_identifier
FROM cancer_study cs
INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id
INNER JOIN clinical_patient cp on p.internal_id = cp.internal_id
WHERE match(cp.attr_value, '^[\d\.]+$');
CREATE MATERIALIZED VIEW patient_clinical_attribute_numeric_mv
TO patient_clinical_attribute_numeric AS
SELECT
concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id,
cp.attr_id as attribute_name,
cast(cp.attr_value as float) as attribute_value,
cs.cancer_study_identifier as cancer_study_identifier
FROM cancer_study cs
INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id
INNER JOIN clinical_patient cp on p.internal_id = cp.internal_id
WHERE match(cp.attr_value, '^[\d\.]+$');
CREATE TABLE patient_clinical_attribute_categorical (
patient_unique_id VARCHAR(45),
attribute_name VARCHAR(45),
attribute_value VARCHAR(45),
cancer_study_identifier VARCHAR(45)
)
ENGINE = MergeTree()
ORDER BY (patient_unique_id, attribute_name, cancer_study_identifier);
INSERT INTO patient_clinical_attribute_categorical
SELECT
concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id,
cp.attr_id as attribute_name,
cp.attr_value as attribute_value,
cs.cancer_study_identifier as cancer_study_identifier
FROM cancer_study cs
INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id
INNER JOIN clinical_patient cp on p.internal_id = cp.internal_id
WHERE NOT match(cp.attr_value, '^[\d\.]+$');
CREATE MATERIALIZED VIEW patient_clinical_attribute_categorical_mv
TO patient_clinical_attribute_categorical AS
SELECT
concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id,
cp.attr_id as attribute_name,
cp.attr_value as attribute_value,
cs.cancer_study_identifier as cancer_study_identifier
FROM cancer_study cs
INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id
INNER JOIN clinical_patient cp on p.internal_id = cp.internal_id
WHERE NOT match(cp.attr_value, '^[\d\.]+$');
CREATE TABLE IF NOT EXISTS sample_columnstore (
sample_unique_id VARCHAR(45),
sample_unique_id_base64 VARCHAR(45),
sample_stable_id VARCHAR(45),
patient_unique_id VARCHAR(45),
patient_unique_id_base64 VARCHAR(45),
patient_stable_id VARCHAR(45),
cancer_study_identifier VARCHAR(45))
ENGINE = MergeTree
ORDER BY (sample_unique_id, patient_unique_id, cancer_study_identifier);
INSERT INTO sample_columnstore
SELECT
concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id,
base64Encode(sample.stable_id) as sample_unique_id_base64,
sample.stable_id as sample_stable_id,
concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id,
p.stable_id as patient_stable_id,
base64Encode(p.stable_id) as patient_unique_id_base64,
cs.cancer_study_identifier as cancer_study_identifier
FROM sample
INNER JOIN patient p on sample.patient_id = p.internal_id
INNER JOIN cancer_study cs on p.cancer_study_id = cs.cancer_study_id;
CREATE MATERIALIZED VIEW sample_columnstore_mv TO sample_columnstore AS
SELECT
concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id,
sample.stable_id as sample_stable_id,
base64Encode(sample.stable_id) as sample_unique_id_base64,
concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id,
p.stable_id as patient_stable_id,
base64Encode(p.stable_id) as patient_unique_id_base64,
cs.cancer_study_identifier as cancer_study_identifier
FROM sample
INNER JOIN patient p on sample.patient_id = p.internal_id
INNER JOIN cancer_study cs on p.cancer_study_id = cs.cancer_study_id;
CREATE TABLE IF NOT EXISTS sample_list_columnstore (
sample_unique_id VARCHAR(45),
sample_list_stable_id VARCHAR(45),
name VARCHAR(45),
cancer_study_identifier VARCHAR(45))
ENGINE = MergeTree
ORDER BY (sample_unique_id, sample_list_stable_id, name, cancer_study_identifier);
INSERT INTO sample_list_columnstore
SELECT
concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id,
sl.stable_id as sample_list_stable_id,
sl.name as name,
cs.cancer_study_identifier as cancer_study_identifier
FROM sample_list as sl
INNER JOIN sample_list_list AS sll on sll.list_id = sl.list_id
INNER JOIN sample AS s on s.internal_id = sll.sample_id
INNER JOIN cancer_study cs on sl.cancer_study_id = cs.cancer_study_id;
CREATE MATERIALIZED VIEW sample_list_columnstore_mv TO sample_list_columnstore AS
SELECT
concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id,
sl.stable_id as sample_list_stable_id,
sl.name as name,
cs.cancer_study_identifier as cancer_study_identifier
FROM sample_list as sl
INNER JOIN sample_list_list AS sll on sll.list_id = sl.list_id
INNER JOIN sample AS s on s.internal_id = sll.sample_id
INNER JOIN cancer_study cs on sl.cancer_study_id = cs.cancer_study_id;
DROP TABLE IF EXISTS genomic_event ;
CREATE TABLE IF NOT EXISTS genomic_event (
sample_unique_id VARCHAR(45),
variant VARCHAR(45),
variant_type VARCHAR(45),
hugo_gene_symbol VARCHAR(45),
gene_panel_stable_id VARCHAR(45),
cancer_study_identifier VARCHAR(45),
genetic_profile_stable_id VARCHAR(45))
ENGINE = MergeTree
ORDER BY (sample_unique_id, variant, hugo_gene_symbol, cancer_study_identifier, genetic_profile_stable_id);
INSERT INTO genomic_event
SELECT
concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id,
me.protein_change as variant,
'mutation' as variant_type,
gene.hugo_gene_symbol as hugo_gene_symbol,
gp.stable_id as gene_panel_stable_id,
cs.cancer_study_identifier as cancer_study_identifier,
g.stable_id as genetic_profile_stable_id
FROM mutation
LEFT JOIN mutation_event as me on mutation.mutation_event_id = me.mutation_event_id
LEFT JOIN sample_profile sp on mutation.sample_id = sp.sample_id and mutation.genetic_profile_id = sp.genetic_profile_id
LEFT JOIN gene_panel gp on sp.panel_id = gp.internal_id
LEFT JOIN genetic_profile g on sp.genetic_profile_id = g.genetic_profile_id
LEFT JOIN cancer_study cs on g.cancer_study_id = cs.cancer_study_id
LEFT JOIN sample on mutation.sample_id = sample.internal_id
LEFT JOIN gene on mutation.entrez_gene_id = gene.entrez_gene_id
UNION ALL
SELECT
concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id,
toString(ce.alteration) as variant,
'cna' as variant_type,
gene.hugo_gene_symbol as hugo_gene_symbol,
gene_panel.stable_id as gene_panel_stable_id,
cs.cancer_study_identifier as cancer_study_identifier,
gp.stable_id as genetic_profile_stable_id
FROM sample_cna_event
LEFT JOIN cna_event ce on sample_cna_event.cna_event_id = ce.cna_event_id
LEFT JOIN gene on ce.entrez_gene_id = gene.entrez_gene_id
LEFT JOIN genetic_profile gp on sample_cna_event.genetic_profile_id = gp.genetic_profile_id
LEFT JOIN sample_profile sp on gp.genetic_profile_id = sp.genetic_profile_id
LEFT JOIN cancer_study cs on gp.cancer_study_id = cs.cancer_study_id
LEFT JOIN sample on sample_cna_event.sample_id = sample.internal_id
LEFT JOIN gene_panel on sp.panel_id = gene_panel.internal_id
UNION ALL
SELECT
concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id,
hugo_gene_symbol,
event_info as variant,
g.stable_id as gene_panel_stable_id,
cs.cancer_study_identifier as cancer_study_identifier,
gp.stable_id as genetic_profile_stable_id
FROM structural_variant as sv
LEFT JOIN (select entrez_gene_id, hugo_gene_symbol as hugo_gene_symbol from gene) gene1 on gene1.entrez_gene_id = sv.site1_entrez_gene_id
LEFT OUTER JOIN genetic_profile gp on gp.genetic_profile_id = sv.genetic_profile_id
LEFT JOIN sample s on sv.sample_id = s.internal_id
LEFT JOIN cancer_study cs on gp.cancer_study_id = cs.cancer_study_id
LEFT JOIN sample_profile sp on gp.genetic_profile_id = sp.genetic_profile_id
LEFT JOIN gene_panel g on sp.panel_id = g.internal_id;
CREATE MATERIALIZED VIEW genomic_event_mutation_mv TO genomic_event AS
SELECT
concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id,
me.protein_change as variant,
'mutation' as variant_type,
gene.hugo_gene_symbol as hugo_gene_symbol,
gp.stable_id as gene_panel_stable_id,
cs.cancer_study_identifier as cancer_study_identifier,
g.stable_id as genetic_profile_stable_id
FROM mutation
LEFT JOIN mutation_event as me on mutation.mutation_event_id = me.mutation_event_id
LEFT JOIN sample_profile sp on mutation.sample_id = sp.sample_id and mutation.genetic_profile_id = sp.genetic_profile_id
LEFT JOIN gene_panel gp on sp.panel_id = gp.internal_id
LEFT JOIN genetic_profile g on sp.genetic_profile_id = g.genetic_profile_id
LEFT JOIN cancer_study cs on g.cancer_study_id = cs.cancer_study_id
LEFT JOIN sample on mutation.sample_id = sample.internal_id
LEFT JOIN gene on mutation.entrez_gene_id = gene.entrez_gene_id;
CREATE MATERIALIZED VIEW genomic_event_cna_mv TO genomic_event AS
SELECT
concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id,
toString(ce.alteration) as variant,
'cna' as variant_type,
gene.hugo_gene_symbol as hugo_gene_symbol,
gene_panel.stable_id as gene_panel_stable_id,
cs.cancer_study_identifier as cancer_study_identifier,
gp.stable_id as genetic_profile_stable_id
FROM sample_cna_event
LEFT JOIN cna_event ce on sample_cna_event.cna_event_id = ce.cna_event_id
LEFT JOIN gene on ce.entrez_gene_id = gene.entrez_gene_id
LEFT JOIN genetic_profile gp on sample_cna_event.genetic_profile_id = gp.genetic_profile_id
LEFT JOIN sample_profile sp on gp.genetic_profile_id = sp.genetic_profile_id
LEFT JOIN cancer_study cs on gp.cancer_study_id = cs.cancer_study_id
LEFT JOIN sample on sample_cna_event.sample_id = sample.internal_id
LEFT JOIN gene_panel on sp.panel_id = gene_panel.internal_id;
CREATE MATERIALIZED VIEW genomic_event_struct_var_mv TO genomic_event AS
SELECT
concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id,
hugo_gene_symbol,
event_info as variant,
g.stable_id as gene_panel_stable_id,
cs.cancer_study_identifier as cancer_study_identifier,
gp.stable_id as genetic_profile_stable_id
FROM structural_variant as sv
LEFT JOIN (select entrez_gene_id, hugo_gene_symbol as hugo_gene_symbol from gene) gene1 on gene1.entrez_gene_id = sv.site1_entrez_gene_id
LEFT OUTER JOIN genetic_profile gp on gp.genetic_profile_id = sv.genetic_profile_id
LEFT JOIN sample s on sv.sample_id = s.internal_id
LEFT JOIN cancer_study cs on gp.cancer_study_id = cs.cancer_study_id
LEFT JOIN sample_profile sp on gp.genetic_profile_id = sp.genetic_profile_id
LEFT JOIN gene_panel g on sp.panel_id = g.internal_id;