forked from shvechikov/pipelines
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pipeline.py
39 lines (33 loc) · 832 Bytes
/
pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from pipelines import tasks, Pipeline
NAME = 'test_project'
VERSION = '2023'
TASKS = [
# tasks.RunSQL('drop table original;'),
# tasks.RunSQL('drop table norm;'),
tasks.LoadFile(input_file='example_pipeline/data/original.csv', table='original', columns=['name', 'url']),
tasks.CTAS(
table='norm',
sql_query='''
select *, domain_of_url(url)
from original
'''
),
tasks.CopyToFile(
table='norm',
output_file='data/norm',
),
# clean up:
tasks.RunSQL('drop table original;'),
tasks.RunSQL('drop table norm;'),
]
pipeline = Pipeline(
name=NAME,
version=VERSION,
tasks=TASKS
)
if __name__ == "__main__":
# 1: Run as script
pipeline.run()
# print("Все гуд")
# 2: Run as CLI
# > pipelines run