Skip to content

Commit

Permalink
feat: replicate crates data on bigquery
Browse files Browse the repository at this point in the history
  • Loading branch information
Jabolol committed Dec 5, 2024
1 parent d985c53 commit a486fb4
Showing 1 changed file with 21 additions and 0 deletions.
21 changes: 21 additions & 0 deletions warehouse/oso_dagster/assets/crates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from google.cloud.bigquery import SourceFormat
from ..constants import staging_bucket
from ..factories.archive2bq import Archive2BqAssetConfig, create_archive2bq_asset

crates_data = create_archive2bq_asset(
Archive2BqAssetConfig(
key_prefix="rust",
asset_name="crates",
source_url="https://static.crates.io/db-dump.tar.gz",
source_format=SourceFormat.CSV,
filter_fn=lambda file: file.endswith(".csv"),
schema_overrides={
"crates": {
"id": "INTEGER",
}
},
staging_bucket=staging_bucket,
dataset_id="crates",
deps=[],
)
)

0 comments on commit a486fb4

Please sign in to comment.