Skip to content

Commit

Permalink
Merge pull request #1561 from dlt-hub/devel
Browse files Browse the repository at this point in the history
master merge for 0.5.1 release
  • Loading branch information
rudolfix authored Jul 8, 2024
2 parents 41918a3 + 34e97cc commit d1e5666
Show file tree
Hide file tree
Showing 452 changed files with 20,770 additions and 6,814 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ jobs:

- name: Install dependencies
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --all-extras --with airflow,providers,pipeline,sentry-sdk
run: poetry install --all-extras --with airflow,providers,pipeline,sentry-sdk,dbt

- name: Run make lint
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_common.yml
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ jobs:
shell: cmd
- name: Install pipeline dependencies
run: poetry install --no-interaction -E duckdb -E cli -E parquet --with sentry-sdk --with pipeline
run: poetry install --no-interaction -E duckdb -E cli -E parquet --with sentry-sdk --with pipeline -E deltalake

- run: |
poetry run pytest tests/extract tests/pipeline tests/libs tests/cli/common tests/destinations
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_dbt_runner.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:

- name: Install dependencies
# install dlt with postgres support
run: poetry install --no-interaction -E postgres -E dbt --with sentry-sdk
run: poetry install --no-interaction -E postgres --with sentry-sdk,dbt

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
81 changes: 81 additions & 0 deletions .github/workflows/test_destination_lancedb.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
name: dest | lancedb

on:
pull_request:
branches:
- master
- devel
workflow_dispatch:
schedule:
- cron: '0 2 * * *'

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

env:
DLT_SECRETS_TOML: ${{ secrets.DLT_SECRETS_TOML }}

RUNTIME__SENTRY_DSN: https://[email protected]/4504819859914752
RUNTIME__LOG_LEVEL: ERROR
RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }}

ACTIVE_DESTINATIONS: "[\"lancedb\"]"
ALL_FILESYSTEM_DRIVERS: "[\"memory\"]"

jobs:
get_docs_changes:
name: docs changes
uses: ./.github/workflows/get_docs_changes.yml
if: ${{ !github.event.pull_request.head.repo.fork || contains(github.event.pull_request.labels.*.name, 'ci from fork')}}

run_loader:
name: dest | lancedb tests
needs: get_docs_changes
if: needs.get_docs_changes.outputs.changes_outside_docs == 'true'
defaults:
run:
shell: bash
runs-on: "ubuntu-latest"

steps:
- name: Check out
uses: actions/checkout@master

- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: "3.11.x"

- name: Install Poetry
uses: snok/[email protected]
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true

- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v3
with:
path: .venv
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml

- name: Install dependencies
run: poetry install --no-interaction -E lancedb -E parquet --with sentry-sdk --with pipeline

- name: Install embedding provider dependencies
run: poetry run pip install openai

- run: |
poetry run pytest tests/load -m "essential"
name: Run essential tests Linux
if: ${{ ! (contains(github.event.pull_request.labels.*.name, 'ci full') || github.event_name == 'schedule')}}
- run: |
poetry run pytest tests/load
name: Run all tests Linux
if: ${{ contains(github.event.pull_request.labels.*.name, 'ci full') || github.event_name == 'schedule'}}
3 changes: 2 additions & 1 deletion .github/workflows/test_destination_qdrant.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ jobs:
run_loader:
name: dest | qdrant tests
needs: get_docs_changes
if: needs.get_docs_changes.outputs.changes_outside_docs == 'true'
# if: needs.get_docs_changes.outputs.changes_outside_docs == 'true'
if: false # TODO re-enable with above line
defaults:
run:
shell: bash
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test_destinations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ env:
RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }}
# Test redshift and filesystem with all buckets
# postgres runs again here so we can test on mac/windows
ACTIVE_DESTINATIONS: "[\"redshift\", \"postgres\", \"duckdb\", \"filesystem\", \"dummy\"]"
ACTIVE_DESTINATIONS: "[\"redshift\", \"postgres\", \"duckdb\", \"filesystem\", \"dummy\", \"motherduck\"]"

jobs:
get_docs_changes:
Expand Down Expand Up @@ -75,7 +75,7 @@ jobs:

- name: Install dependencies
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction -E redshift -E gs -E s3 -E az -E parquet -E duckdb -E cli --with sentry-sdk --with pipeline
run: poetry install --no-interaction -E redshift -E gs -E s3 -E az -E parquet -E duckdb -E cli --with sentry-sdk --with pipeline -E deltalake

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
24 changes: 23 additions & 1 deletion .github/workflows/test_doc_snippets.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ env:

# Slack hook for chess in production example
RUNTIME__SLACK_INCOMING_HOOK: ${{ secrets.RUNTIME__SLACK_INCOMING_HOOK }}
# Path to local qdrant database
DESTINATION__QDRANT__CREDENTIALS__PATH: zendesk.qdb
# detect if the workflow is executed in a repo fork
IS_FORK: ${{ github.event.pull_request.head.repo.fork }}

Expand All @@ -32,6 +34,26 @@ jobs:
# Do not run on forks, unless allowed, secrets are used here
if: ${{ !github.event.pull_request.head.repo.fork || contains(github.event.pull_request.labels.*.name, 'ci from fork')}}

# Service containers to run with `container-job`
services:
# Label used to access the service container
postgres:
# Docker Hub image
image: postgres
# Provide the password for postgres
env:
POSTGRES_DB: dlt_data
POSTGRES_USER: loader
POSTGRES_PASSWORD: loader
ports:
- 5432:5432
# Set health checks to wait until postgres has started
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
steps:

- name: Check out
Expand Down Expand Up @@ -61,7 +83,7 @@ jobs:

- name: Install dependencies
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction -E duckdb -E weaviate -E parquet -E qdrant -E bigquery -E postgres --with docs,sentry-sdk --without airflow
run: poetry install --no-interaction -E duckdb -E weaviate -E parquet -E qdrant -E bigquery -E postgres -E lancedb --with docs,sentry-sdk --without airflow

- name: create secrets.toml for examples
run: pwd && echo "$DLT_SECRETS_TOML" > docs/examples/.dlt/secrets.toml
Expand Down
10 changes: 8 additions & 2 deletions .github/workflows/test_local_destinations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ env:
RUNTIME__SENTRY_DSN: https://[email protected]/4504819859914752
RUNTIME__LOG_LEVEL: ERROR
RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }}
ACTIVE_DESTINATIONS: "[\"duckdb\", \"postgres\", \"filesystem\", \"weaviate\"]"
ACTIVE_DESTINATIONS: "[\"duckdb\", \"postgres\", \"filesystem\", \"weaviate\", \"qdrant\"]"
ALL_FILESYSTEM_DRIVERS: "[\"memory\", \"file\"]"

DESTINATION__WEAVIATE__VECTORIZER: text2vec-contextionary
Expand Down Expand Up @@ -63,6 +63,11 @@ jobs:
--health-timeout 5s
--health-retries 5
qdrant:
image: qdrant/qdrant:v1.8.4
ports:
- 6333:6333

steps:
- name: Check out
uses: actions/checkout@master
Expand Down Expand Up @@ -90,7 +95,7 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-local-destinations

- name: Install dependencies
run: poetry install --no-interaction -E postgres -E duckdb -E parquet -E filesystem -E cli -E weaviate --with sentry-sdk --with pipeline
run: poetry install --no-interaction -E postgres -E duckdb -E parquet -E filesystem -E cli -E weaviate -E qdrant --with sentry-sdk --with pipeline -E deltalake

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand All @@ -100,6 +105,7 @@ jobs:
name: Run tests Linux
env:
DESTINATION__POSTGRES__CREDENTIALS: postgresql://loader:loader@localhost:5432/dlt_data
DESTINATION__QDRANT__CREDENTIALS__location: http://localhost:6333

- name: Stop weaviate
if: always()
Expand Down
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ has-poetry:
poetry --version

dev: has-poetry
poetry install --all-extras --with airflow --with docs --with providers --with pipeline --with sentry-sdk
poetry install --all-extras --with airflow,docs,providers,pipeline,sentry-sdk,dbt

lint:
./tools/check-package.sh
Expand All @@ -67,9 +67,9 @@ lint-and-test-snippets:
cd docs/website/docs && poetry run pytest --ignore=node_modules

lint-and-test-examples:
poetry run mypy --config-file mypy.ini docs/examples
poetry run flake8 --max-line-length=200 docs/examples
cd docs/tools && poetry run python prepare_examples_tests.py
poetry run flake8 --max-line-length=200 docs/examples
poetry run mypy --config-file mypy.ini docs/examples
cd docs/examples && poetry run pytest


Expand Down
10 changes: 1 addition & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,12 @@ Be it a Google Colab notebook, AWS Lambda function, an Airflow DAG, your local l

dlt supports Python 3.8+.

**pip:**
```sh
pip install dlt
```

**pixi:**
```sh
pixi add dlt
```
More options: [Install via Conda or Pixi](https://dlthub.com/docs/reference/installation#install-dlt-via-pixi-and-conda)

**conda:**
```sh
conda install -c conda-forge dlt
```

## Quick Start

Expand Down
2 changes: 1 addition & 1 deletion dlt/cli/_dlt.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def schema_command_wrapper(file_path: str, format_: str, remove_defaults: bool)
schema_str = json.dumps(s.to_dict(remove_defaults=remove_defaults), pretty=True)
else:
schema_str = s.to_pretty_yaml(remove_defaults=remove_defaults)
print(schema_str)
fmt.echo(schema_str)
return 0


Expand Down
9 changes: 5 additions & 4 deletions dlt/cli/config_toml_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@
from tomlkit.container import Container as TOMLContainer
from collections.abc import Sequence as C_Sequence

from dlt.common.configuration.specs.base_configuration import is_hint_not_resolvable
from dlt.common.pendulum import pendulum
from dlt.common.configuration.specs import (
BaseConfiguration,
is_base_configuration_inner_hint,
extract_inner_hint,
)
from dlt.common.data_types import py_type_to_sc_type
from dlt.common.typing import AnyType, is_final_type, is_optional_type
from dlt.common.typing import AnyType, is_optional_type, is_subclass


class WritableConfigValue(NamedTuple):
Expand All @@ -34,7 +35,7 @@ def generate_typed_example(name: str, hint: AnyType) -> Any:
if sc_type == "bool":
return True
if sc_type == "complex":
if issubclass(inner_hint, C_Sequence):
if is_subclass(inner_hint, C_Sequence):
return ["a", "b", "c"]
else:
table = tomlkit.table(False)
Expand Down Expand Up @@ -62,9 +63,9 @@ def write_value(
# skip if table contains the name already
if name in toml_table and not overwrite_existing:
return
# do not dump final and optional fields if they are not of special interest
# do not dump nor resolvable and optional fields if they are not of special interest
if (
is_final_type(hint) or is_optional_type(hint) or default_value is not None
is_hint_not_resolvable(hint) or is_optional_type(hint) or default_value is not None
) and not is_default_of_interest:
return
# get the inner hint to generate cool examples
Expand Down
13 changes: 8 additions & 5 deletions dlt/cli/deploy_command_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,22 +263,25 @@ def parse_pipeline_info(visitor: PipelineScriptVisitor) -> List[Tuple[str, Optio
if n.PIPELINE in visitor.known_calls:
for call_args in visitor.known_calls[n.PIPELINE]:
pipeline_name, pipelines_dir = None, None
f_r_node = call_args.arguments.get("full_refresh")
# Check both full_refresh/dev_mode until full_refresh option is removed from dlt
f_r_node = call_args.arguments.get("full_refresh") or call_args.arguments.get(
"dev_mode"
)
if f_r_node:
f_r_value = evaluate_node_literal(f_r_node)
if f_r_value is None:
fmt.warning(
"The value of `full_refresh` in call to `dlt.pipeline` cannot be"
"The value of `dev_mode` in call to `dlt.pipeline` cannot be"
f" determined from {unparse(f_r_node).strip()}. We assume that you know"
" what you are doing :)"
)
if f_r_value is True:
if fmt.confirm(
"The value of 'full_refresh' is set to True. Do you want to abort to set it"
" to False?",
"The value of 'dev_mode' or 'full_refresh' is set to True. Do you want to"
" abort to set it to False?",
default=True,
):
raise CliCommandException("deploy", "Please set the full_refresh to False")
raise CliCommandException("deploy", "Please set the dev_mode to False")

p_d_node = call_args.arguments.get("pipelines_dir")
if p_d_node:
Expand Down
Loading

0 comments on commit d1e5666

Please sign in to comment.