-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Migrated metrics_v0 and timeseries_metrics_by_artifact_v0 to sqlmesh (#…
…2089) * Move metrics_v0 to sqlmesh * Works on clickhouse
- Loading branch information
Showing
11 changed files
with
208 additions
and
145 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
from sqlmesh import macro | ||
from sqlmesh.core.macros import MacroEvaluator | ||
from sqlglot import expressions as exp | ||
|
||
|
||
@macro() | ||
def oso_id(_evaluator: MacroEvaluator, *args: exp.Expression): | ||
return exp.SHA2( | ||
this=exp.Concat(expressions=args, safe=True, coalesce=False), | ||
length=exp.Literal(this=256, is_string=False), | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,109 +1,109 @@ | ||
""" | ||
A source macro that can be used for rewriting a source reference at runtime. | ||
This mimics the sources behavior in dbt except that the source and destination | ||
of the rewrite is infinitely flexible. | ||
""" | ||
|
||
from typing import Optional, Dict, List | ||
import os | ||
import glob | ||
import yaml | ||
from sqlmesh import macro | ||
from sqlmesh.core.macros import MacroEvaluator | ||
from sqlglot import to_table | ||
from pydantic import BaseModel, model_validator | ||
|
||
CURR_DIR = os.path.abspath(os.path.dirname(__file__)) | ||
SOURCE_YAML_DIR = os.path.abspath(os.path.join(CURR_DIR, "../sources")) | ||
SOURCE_YAML_GLOB = os.path.join(SOURCE_YAML_DIR, "*.yml") | ||
|
||
|
||
class TableReference(BaseModel): | ||
name: str | ||
catalog: Optional[str] = None | ||
table_name: Optional[str] = None | ||
schema_name: str | ||
|
||
@model_validator(mode="after") | ||
def ensure_table_name(self): | ||
if self.table_name is None: | ||
self.table_name = self.name | ||
return self | ||
|
||
|
||
class SourcesFile(BaseModel): | ||
gateway: str | ||
sources: Dict[str, List[TableReference]] | ||
|
||
|
||
EnvSourceMap = Dict[str, Dict[str, Dict[str, TableReference]]] | ||
|
||
|
||
def read_yaml_files(glob_pattern) -> List[SourcesFile]: | ||
# Something about the multithread/processing of sqlmesh probably interferes | ||
# with something in pydantic. This is a hack for now to get this working, | ||
# but we should likely just use a typed dict or a simple dataclass to do | ||
# this validation. | ||
from typing import Optional, Dict, List | ||
|
||
class TableReference(BaseModel): | ||
name: str | ||
catalog: Optional[str] = None | ||
table_name: Optional[str] = None | ||
schema_name: str | ||
|
||
@model_validator(mode="after") | ||
def ensure_table_name(self): | ||
if self.table_name is None: | ||
self.table_name = self.name | ||
return self | ||
|
||
class SourcesFile(BaseModel): | ||
gateway: str | ||
sources: Dict[str, List[TableReference]] | ||
|
||
SourcesFile.model_rebuild() | ||
sources_files: List[SourcesFile] = [] | ||
# Find all files matching the glob pattern | ||
for file_name in glob.glob(glob_pattern): | ||
if os.path.isfile(file_name): | ||
with open(file_name, "r") as file: | ||
try: | ||
data = yaml.safe_load(file) | ||
sources_files.append(SourcesFile.model_validate(data)) | ||
except yaml.YAMLError as exc: | ||
print(f"Error parsing {file_name}: {exc}") | ||
return sources_files | ||
|
||
|
||
def generate_source_map(parsed_sources_files: List[SourcesFile]) -> EnvSourceMap: | ||
env_source_map: EnvSourceMap = {} | ||
for sources_file in parsed_sources_files: | ||
if sources_file.gateway not in env_source_map: | ||
env_source_map[sources_file.gateway] = {} | ||
source_map = env_source_map[sources_file.gateway] | ||
for key, table_refs in sources_file.sources.items(): | ||
if key not in source_map: | ||
source_map[key] = {} | ||
for table_ref in table_refs: | ||
if table_ref.name in source_map[key]: | ||
print("WARNING: table annotated multiple times") | ||
source_map[key][table_ref.name] = table_ref | ||
return env_source_map | ||
|
||
|
||
@macro() | ||
def source(evaluator: MacroEvaluator, ref: str, table: str): | ||
"""Allows us to change the location of a source when the gateway changes.""" | ||
source_map = generate_source_map(read_yaml_files(SOURCE_YAML_GLOB)) | ||
|
||
gateway = evaluator.gateway | ||
if not gateway: | ||
return "" | ||
table_ref = source_map[gateway][ref][table] | ||
if not table_ref.catalog: | ||
return to_table(f'"{table_ref.schema_name}"."{table_ref.table_name}"') | ||
return to_table( | ||
f'"{table_ref.catalog}"."{table_ref.schema_name}"."{table_ref.table_name}"' | ||
) | ||
# """ | ||
# A source macro that can be used for rewriting a source reference at runtime. | ||
|
||
# This mimics the sources behavior in dbt except that the source and destination | ||
# of the rewrite is infinitely flexible. | ||
# """ | ||
|
||
# from typing import Optional, Dict, List | ||
# import os | ||
# import glob | ||
# import yaml | ||
# from sqlmesh import macro | ||
# from sqlmesh.core.macros import MacroEvaluator | ||
# from sqlglot import to_table | ||
# from pydantic import BaseModel, model_validator | ||
|
||
# CURR_DIR = os.path.abspath(os.path.dirname(__file__)) | ||
# SOURCE_YAML_DIR = os.path.abspath(os.path.join(CURR_DIR, "../sources")) | ||
# SOURCE_YAML_GLOB = os.path.join(SOURCE_YAML_DIR, "*.yml") | ||
|
||
|
||
# class TableReference(BaseModel): | ||
# name: str | ||
# catalog: Optional[str] = None | ||
# table_name: Optional[str] = None | ||
# schema_name: str | ||
|
||
# @model_validator(mode="after") | ||
# def ensure_table_name(self): | ||
# if self.table_name is None: | ||
# self.table_name = self.name | ||
# return self | ||
|
||
|
||
# class SourcesFile(BaseModel): | ||
# gateway: str | ||
# sources: Dict[str, List[TableReference]] | ||
|
||
|
||
# EnvSourceMap = Dict[str, Dict[str, Dict[str, TableReference]]] | ||
|
||
|
||
# def read_yaml_files(glob_pattern) -> List[SourcesFile]: | ||
# # Something about the multithread/processing of sqlmesh probably interferes | ||
# # with something in pydantic. This is a hack for now to get this working, | ||
# # but we should likely just use a typed dict or a simple dataclass to do | ||
# # this validation. | ||
# from typing import Optional, Dict, List | ||
|
||
# class TableReference(BaseModel): | ||
# name: str | ||
# catalog: Optional[str] = None | ||
# table_name: Optional[str] = None | ||
# schema_name: str | ||
|
||
# @model_validator(mode="after") | ||
# def ensure_table_name(self): | ||
# if self.table_name is None: | ||
# self.table_name = self.name | ||
# return self | ||
|
||
# class SourcesFile(BaseModel): | ||
# gateway: str | ||
# sources: Dict[str, List[TableReference]] | ||
|
||
# SourcesFile.model_rebuild() | ||
# sources_files: List[SourcesFile] = [] | ||
# # Find all files matching the glob pattern | ||
# for file_name in glob.glob(glob_pattern): | ||
# if os.path.isfile(file_name): | ||
# with open(file_name, "r") as file: | ||
# try: | ||
# data = yaml.safe_load(file) | ||
# sources_files.append(SourcesFile.model_validate(data)) | ||
# except yaml.YAMLError as exc: | ||
# print(f"Error parsing {file_name}: {exc}") | ||
# return sources_files | ||
|
||
|
||
# def generate_source_map(parsed_sources_files: List[SourcesFile]) -> EnvSourceMap: | ||
# env_source_map: EnvSourceMap = {} | ||
# for sources_file in parsed_sources_files: | ||
# if sources_file.gateway not in env_source_map: | ||
# env_source_map[sources_file.gateway] = {} | ||
# source_map = env_source_map[sources_file.gateway] | ||
# for key, table_refs in sources_file.sources.items(): | ||
# if key not in source_map: | ||
# source_map[key] = {} | ||
# for table_ref in table_refs: | ||
# if table_ref.name in source_map[key]: | ||
# print("WARNING: table annotated multiple times") | ||
# source_map[key][table_ref.name] = table_ref | ||
# return env_source_map | ||
|
||
|
||
# @macro() | ||
# def source(evaluator: MacroEvaluator, ref: str, table: str): | ||
# """Allows us to change the location of a source when the gateway changes.""" | ||
# source_map = generate_source_map(read_yaml_files(SOURCE_YAML_GLOB)) | ||
|
||
# gateway = evaluator.gateway | ||
# if not gateway: | ||
# return "" | ||
# table_ref = source_map[gateway][ref][table] | ||
# if not table_ref.catalog: | ||
# return to_table(f'"{table_ref.schema_name}"."{table_ref.table_name}"') | ||
# return to_table( | ||
# f'"{table_ref.catalog}"."{table_ref.schema_name}"."{table_ref.table_name}"' | ||
# ) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
MODEL ( | ||
name metrics.metrics_v0, | ||
kind FULL, | ||
dialect "clickhouse" | ||
); | ||
WITH all_timeseries_metric_names AS ( | ||
SELECT DISTINCT | ||
metric | ||
FROM metrics.timeseries_metrics_by_artifact_over_30_days | ||
), | ||
metrics_v0_no_casting AS ( | ||
SELECT | ||
@oso_id('OSO', 'oso', metric) AS metric_id, | ||
'OSO' AS metric_source, | ||
'oso' AS metric_namespace, | ||
metric AS metric_name, | ||
metric AS display_name, | ||
'TODO' AS description, | ||
NULL AS raw_definition, | ||
'TODO' AS definition_ref, | ||
'UNKNOWN' AS aggregation_function | ||
FROM all_timeseries_metric_names | ||
) | ||
select | ||
metric_id::String, | ||
metric_source::String, | ||
metric_name::String, | ||
display_name::String, | ||
description::Nullable(String), | ||
raw_definition::Nullable(String), | ||
definition_ref::Nullable(String), | ||
aggregation_function::Nullable(String) | ||
FROM metrics_v0_no_casting |
Oops, something went wrong.