From 33876993536bfa67c1b5278345823f9ba0b28095 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20R=C3=ADos?= Date: Wed, 11 Sep 2024 14:23:41 +0200 Subject: [PATCH] add: dagster `quickstart` setup to docs (#2129) * fix: order `sidebar` position * add: dagster `local` setup quickstart --- apps/docs/docs/contribute/connect-data/api.md | 2 +- .../docs/contribute/connect-data/dagster.md | 2 +- .../docs/contribute/connect-data/database.md | 2 +- apps/docs/docs/contribute/connect-data/gcs.md | 2 +- .../contribute/connect-data/quickstart.md | 86 +++++++++++++++++++ 5 files changed, 90 insertions(+), 4 deletions(-) create mode 100644 apps/docs/docs/contribute/connect-data/quickstart.md diff --git a/apps/docs/docs/contribute/connect-data/api.md b/apps/docs/docs/contribute/connect-data/api.md index b49baff24..0071a2814 100644 --- a/apps/docs/docs/contribute/connect-data/api.md +++ b/apps/docs/docs/contribute/connect-data/api.md @@ -1,6 +1,6 @@ --- title: Crawl an API -sidebar_position: 3 +sidebar_position: 4 --- import NextSteps from "./dagster-config.mdx" diff --git a/apps/docs/docs/contribute/connect-data/dagster.md b/apps/docs/docs/contribute/connect-data/dagster.md index 44c36d622..75ff21b4c 100644 --- a/apps/docs/docs/contribute/connect-data/dagster.md +++ b/apps/docs/docs/contribute/connect-data/dagster.md @@ -1,6 +1,6 @@ --- title: Writing Custom Dagster Assets -sidebar_position: 5 +sidebar_position: 6 --- import NextSteps from "./dagster-config.mdx" diff --git a/apps/docs/docs/contribute/connect-data/database.md b/apps/docs/docs/contribute/connect-data/database.md index b3064f898..a6b10af01 100644 --- a/apps/docs/docs/contribute/connect-data/database.md +++ b/apps/docs/docs/contribute/connect-data/database.md @@ -1,6 +1,6 @@ --- title: Replicate a Database -sidebar_position: 2 +sidebar_position: 3 --- import NextSteps from "./dagster-config.mdx" diff --git a/apps/docs/docs/contribute/connect-data/gcs.md b/apps/docs/docs/contribute/connect-data/gcs.md index d5fae11da..b256c3bf9 100644 --- a/apps/docs/docs/contribute/connect-data/gcs.md +++ b/apps/docs/docs/contribute/connect-data/gcs.md @@ -1,6 +1,6 @@ --- title: Connect via Google Cloud Storage (GCS) -sidebar_position: 4 +sidebar_position: 5 --- import NextSteps from "./dagster-config.mdx" diff --git a/apps/docs/docs/contribute/connect-data/quickstart.md b/apps/docs/docs/contribute/connect-data/quickstart.md new file mode 100644 index 000000000..0fb130a1a --- /dev/null +++ b/apps/docs/docs/contribute/connect-data/quickstart.md @@ -0,0 +1,86 @@ +--- +title: Dagster Quickstart +sidebar_position: 2 +--- + +# Dagster quickstart + +[Dagster](https://dagster.io) is a data orchestrator that allows you to define +data pipelines in a declarative way. It is a powerful tool that allows you to +define the flow of data from source to destination, and to define the +transformations that data undergoes along the way. + +At OSO, we use Dagster to process data from +[various sources](https://github.com/opensource-observer/oso/tree/main/warehouse/oso_dagster/assets), +transform it, and load it into BigQuery. This quickstart guide will help you set +up our Dagster instance locally, with a [`duckdb`](http://duckdb.org/) backend, +in order to follow along with our tutorials in the next sections. + +## Setting up Dagster + +First, we need to clone the +[OSO GitHub repository](http://github.com/opensource-observer/oso) and install +the required dependencies. + +```sh +git clone git@github.com:opensource-observer/oso.git . +``` + +Install the dependencies and create a virtual environment with +[poetry](https://python-poetry.org): + +```sh +poetry install && poetry shell +``` + +Now, let's fill the `.env` file with the required environment variables: + +```sh +GOOGLE_PROJECT_ID= +DAGSTER_DBT_PARSE_PROJECT_ON_LOAD=1 +DAGSTER_HOME=/tmp/dagster-home +``` + +After setting the environment variables, Dagster needs `$DAGSTER_HOME` to be +created before running the Dagster instance. + +```sh +mkdir /tmp/dagster-home +``` + +:::info + +Lastly, we need to configure `dagster.yaml` to disable concurrency. Our example +is located at `/tmp/dagster-home/dagster.yaml`: + +This is currently a limitation with our `duckdb` integration. Please check out +[this issue](https://github.com/opensource-observer/oso/issues/2040#issue-2503231601) +for more information. + +```yaml +run_queue: + max_concurrent_runs: 1 +``` + +::: + +## Running Dagster + +Now that we have everything set up, we can run the Dagster instance: + +```sh +dagster dev +``` + +After a little bit of time, you should see the following message: + +```sh +2024-09-10 22:35:31 +0200 - dagster.daemon - INFO - Instance is configured with the following daemons: ['AssetDaemon', 'BackfillDaemon', 'QueuedRunCoordinatorDaemon', 'SchedulerDaemon', 'SensorDaemon'] +2024-09-10 22:35:31 +0200 - dagster-webserver - INFO - Serving dagster-webserver on http://127.0.0.1:3000 in process 1095 +``` + +Head over to [http://localhost:3000](http://localhost:3000) to access Dagster's +UI. _Et voilĂ _! You have successfully set up Dagster locally. + +This is just the beginning. Check out how to create a +[DLT Dagster Asset](./api.md#create-dlt-dagster-assets) next and start building!