Skip to content

Commit

Permalink
Merge pull request #1278 from dlt-hub/devel
Browse files Browse the repository at this point in the history
master merge for 0.4.9 release
  • Loading branch information
rudolfix authored Apr 25, 2024
2 parents 0f5c462 + a529924 commit efaedc2
Show file tree
Hide file tree
Showing 210 changed files with 6,973 additions and 1,649 deletions.
1 change: 1 addition & 0 deletions .github/workflows/test_common.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ concurrency:

env:
RUNTIME__LOG_LEVEL: ERROR
RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }}

jobs:
get_docs_changes:
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/test_dbt_cloud.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ env:
DBT_CLOUD__API_TOKEN: ${{ secrets.DBT_CLOUD__API_TOKEN }}

RUNTIME__LOG_LEVEL: ERROR
RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }}

jobs:
get_docs_changes:
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/test_dbt_runner.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ env:

DLT_SECRETS_TOML: ${{ secrets.DLT_SECRETS_TOML }}
RUNTIME__LOG_LEVEL: ERROR
RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }}

jobs:
get_docs_changes:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_athena.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ env:

RUNTIME__SENTRY_DSN: https://[email protected]/4504819859914752
RUNTIME__LOG_LEVEL: ERROR
RUNTIME__DLTHUB_TELEMETRY_SEGMENT_WRITE_KEY: TLJiyRkGVZGCi2TtjClamXpFcxAA1rSB
RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }}
ACTIVE_DESTINATIONS: "[\"athena\"]"
ALL_FILESYSTEM_DRIVERS: "[\"memory\"]"
EXCLUDED_DESTINATION_CONFIGURATIONS: "[\"athena-parquet-staging-iceberg\", \"athena-parquet-no-staging-iceberg\"]"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_athena_iceberg.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ env:

RUNTIME__SENTRY_DSN: https://[email protected]/4504819859914752
RUNTIME__LOG_LEVEL: ERROR
RUNTIME__DLTHUB_TELEMETRY_SEGMENT_WRITE_KEY: TLJiyRkGVZGCi2TtjClamXpFcxAA1rSB
RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }}
ACTIVE_DESTINATIONS: "[\"athena\"]"
ALL_FILESYSTEM_DRIVERS: "[\"memory\"]"
EXCLUDED_DESTINATION_CONFIGURATIONS: "[\"athena-no-staging\", \"athena-parquet-no-staging\"]"
Expand Down
7 changes: 1 addition & 6 deletions .github/workflows/test_destination_bigquery.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ env:

RUNTIME__SENTRY_DSN: https://[email protected]/4504819859914752
RUNTIME__LOG_LEVEL: ERROR
RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }}

ACTIVE_DESTINATIONS: "[\"bigquery\"]"
ALL_FILESYSTEM_DRIVERS: "[\"memory\"]"
Expand Down Expand Up @@ -70,12 +71,6 @@ jobs:
- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml

- run: |
poetry run pytest tests/load -m "essential"
name: Run essential tests Linux
if: ${{ ! (contains(github.event.pull_request.labels.*.name, 'ci full') || github.event_name == 'schedule')}}
- run: |
poetry run pytest tests/load
name: Run all tests Linux
if: ${{ contains(github.event.pull_request.labels.*.name, 'ci full') || github.event_name == 'schedule'}}
1 change: 1 addition & 0 deletions .github/workflows/test_destination_databricks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ env:

RUNTIME__SENTRY_DSN: https://[email protected]/4504819859914752
RUNTIME__LOG_LEVEL: ERROR
RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }}

ACTIVE_DESTINATIONS: "[\"databricks\"]"
ALL_FILESYSTEM_DRIVERS: "[\"memory\"]"
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/test_destination_dremio.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ concurrency:
env:
RUNTIME__SENTRY_DSN: https://[email protected]/4504819859914752
RUNTIME__LOG_LEVEL: ERROR
RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }}

ACTIVE_DESTINATIONS: "[\"dremio\"]"
ALL_FILESYSTEM_DRIVERS: "[\"memory\"]"
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/test_destination_mssql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ env:

RUNTIME__SENTRY_DSN: https://[email protected]/4504819859914752
RUNTIME__LOG_LEVEL: ERROR
RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }}

ACTIVE_DESTINATIONS: "[\"mssql\"]"
ALL_FILESYSTEM_DRIVERS: "[\"memory\"]"
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/test_destination_qdrant.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ env:

RUNTIME__SENTRY_DSN: https://[email protected]/4504819859914752
RUNTIME__LOG_LEVEL: ERROR
RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }}

ACTIVE_DESTINATIONS: "[\"qdrant\"]"
ALL_FILESYSTEM_DRIVERS: "[\"memory\"]"
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/test_destination_snowflake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ env:

RUNTIME__SENTRY_DSN: https://[email protected]/4504819859914752
RUNTIME__LOG_LEVEL: ERROR
RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }}

ACTIVE_DESTINATIONS: "[\"snowflake\"]"
ALL_FILESYSTEM_DRIVERS: "[\"memory\"]"
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/test_destination_synapse.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ env:

RUNTIME__SENTRY_DSN: https://cf6086f7d263462088b9fb9f9947caee@o4505514867163136.ingest.sentry.io/4505516212682752
RUNTIME__LOG_LEVEL: ERROR
RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }}

ACTIVE_DESTINATIONS: "[\"synapse\"]"
ALL_FILESYSTEM_DRIVERS: "[\"memory\"]"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destinations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ env:

RUNTIME__SENTRY_DSN: https://[email protected]/4504819859914752
RUNTIME__LOG_LEVEL: ERROR
RUNTIME__DLTHUB_TELEMETRY_SEGMENT_WRITE_KEY: TLJiyRkGVZGCi2TtjClamXpFcxAA1rSB
RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }}
# Test redshift and filesystem with all buckets
# postgres runs again here so we can test on mac/windows
ACTIVE_DESTINATIONS: "[\"redshift\", \"postgres\", \"duckdb\", \"filesystem\", \"dummy\"]"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_doc_snippets.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ env:

RUNTIME__SENTRY_DSN: https://[email protected]/4504819859914752
RUNTIME__LOG_LEVEL: ERROR
RUNTIME__DLTHUB_TELEMETRY_SEGMENT_WRITE_KEY: TLJiyRkGVZGCi2TtjClamXpFcxAA1rSB
RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }}

# Slack hook for chess in production example
RUNTIME__SLACK_INCOMING_HOOK: ${{ secrets.RUNTIME__SLACK_INCOMING_HOOK }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_local_destinations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ env:

RUNTIME__SENTRY_DSN: https://[email protected]/4504819859914752
RUNTIME__LOG_LEVEL: ERROR
RUNTIME__DLTHUB_TELEMETRY_SEGMENT_WRITE_KEY: TLJiyRkGVZGCi2TtjClamXpFcxAA1rSB
RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }}
ACTIVE_DESTINATIONS: "[\"duckdb\", \"postgres\", \"filesystem\", \"weaviate\"]"
ALL_FILESYSTEM_DRIVERS: "[\"memory\", \"file\"]"

Expand Down
31 changes: 26 additions & 5 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@ Thank you for considering contributing to **dlt**! We appreciate your help in ma

1. [Getting Started](#getting-started)
2. [Submitting Changes](#submitting-changes)
3. [Linting](#linting)
4. [Testing](#testing)
5. [Local Development](#local-development)
6. [Publishing (Maintainers Only)](#publishing-maintainers-only)
7. [Resources](#resources)
3. [Adding or updating core dependencies](#adding-or-updating-core-dependencies)
4. [Linting](#linting)
5. [Testing](#testing)
6. [Local Development](#local-development)
7. [Publishing (Maintainers Only)](#publishing-maintainers-only)
8. [Resources](#resources)

## Before You Begin

Expand Down Expand Up @@ -62,6 +63,26 @@ only the `duckdb` and `postgres` are available to forks.

In case you submit a new destination or make changes to a destination that require credentials (so Bigquery, Snowflake, buckets etc.) you **should contact us so we can add you as contributor**. Then you should make a PR directly to the `dlt` repo.

## Adding or updating core dependencies

Our objective is to maintain stability and compatibility of dlt across all environments.
By following these guidelines, we can make sure that dlt stays secure, reliable and compatible.
Please consider the following points carefully when proposing updates to dependencies.

### Updating guidelines

1. **Critical security or system integrity updates only:**
Major or minor version updates to dependencies should only be considered if there are critical security vulnerabilities or issues that impact the system's integrity. In such cases, updating is necessary to protect the system and the data it processes.

2. **Using the '>=' operator:**
When specifying dependencies, please make sure to use the `>=` operator while also maintaining version minima. This approach ensures our project remains compatible with older systems and setups, mitigating potential unsolvable dependency conflicts.

For example, if our project currently uses a package `example-package==1.2.3`, and a security update is
released as `1.2.4`, instead of updating to `example-package==1.2.4`, we can set it to `example-package>=1.2.3,<2.0.0`. This permits the necessary security update and at the same time
prevents the automatic jump to a potentially incompatible major version update in the future.
The other important note on using possible version minimas is to prevent potential cases where package
versions will not be resolvable.

## Linting

`dlt` uses `mypy` and `flake8` with several plugins for linting.
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,4 +113,4 @@ The dlt project is quickly growing, and we're excited to have you join our commu

## License

DLT is released under the [Apache 2.0 License](LICENSE.txt).
`dlt` is released under the [Apache 2.0 License](LICENSE.txt).
2 changes: 1 addition & 1 deletion dlt/cli/_dlt.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import click

from dlt.version import __version__
from dlt.common import json
from dlt.common.json import json
from dlt.common.schema import Schema
from dlt.common.typing import DictStrAny
from dlt.common.runners import Venv
Expand Down
2 changes: 1 addition & 1 deletion dlt/cli/config_toml_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from tomlkit.container import Container as TOMLContainer
from collections.abc import Sequence as C_Sequence

from dlt.common import pendulum
from dlt.common.pendulum import pendulum
from dlt.common.configuration.specs import (
BaseConfiguration,
is_base_configuration_inner_hint,
Expand Down
2 changes: 1 addition & 1 deletion dlt/cli/pipeline_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import dlt
from dlt.cli.exceptions import CliCommandException

from dlt.common import json
from dlt.common.json import json
from dlt.common.pipeline import resource_state, get_dlt_pipelines_dir, TSourceState
from dlt.common.destination.reference import TDestinationReferenceArg
from dlt.common.runners import Venv
Expand Down
3 changes: 0 additions & 3 deletions dlt/cli/utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
import ast
import os
import tempfile
from typing import Callable

from dlt.common import git
from dlt.common.reflection.utils import set_ast_parents
from dlt.common.storages import FileStorage
from dlt.common.typing import TFun
from dlt.common.configuration import resolve_configuration
from dlt.common.configuration.specs import RunConfiguration
Expand Down
3 changes: 1 addition & 2 deletions dlt/common/configuration/providers/google_secrets.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import base64
import string
import re
from typing import Tuple

from dlt.common import json
from dlt.common.json import json
from dlt.common.configuration.specs import GcpServiceAccountCredentials
from dlt.common.exceptions import MissingDependencyException
from .toml import VaultTomlProvider
Expand Down
2 changes: 1 addition & 1 deletion dlt/common/configuration/providers/toml.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from tomlkit.container import Container as TOMLContainer
from typing import Any, Dict, Optional, Tuple, Type, Union

from dlt.common import pendulum
from dlt.common.pendulum import pendulum
from dlt.common.configuration.paths import get_dlt_settings_dir, get_dlt_data_dir
from dlt.common.configuration.utils import auto_cast
from dlt.common.configuration.specs import known_sections
Expand Down
3 changes: 1 addition & 2 deletions dlt/common/configuration/specs/azure_credentials.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from typing import Optional, Dict, Any

from dlt.common import pendulum
from dlt.common.exceptions import MissingDependencyException
from dlt.common.pendulum import pendulum
from dlt.common.typing import TSecretStrValue
from dlt.common.configuration.specs import (
CredentialsConfiguration,
Expand Down
3 changes: 2 additions & 1 deletion dlt/common/configuration/specs/gcp_credentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
import sys
from typing import Any, ClassVar, Final, List, Tuple, Union, Dict

from dlt.common import json, pendulum
from dlt.common.json import json
from dlt.common.pendulum import pendulum
from dlt.common.configuration.specs.api_credentials import OAuth2Credentials
from dlt.common.configuration.specs.exceptions import (
InvalidGoogleNativeCredentialsType,
Expand Down
1 change: 1 addition & 0 deletions dlt/common/configuration/specs/run_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class RunConfiguration(BaseConfiguration):
slack_incoming_hook: Optional[TSecretStrValue] = None
dlthub_telemetry: bool = True # enable or disable dlthub telemetry
dlthub_telemetry_segment_write_key: str = "a1F2gc6cNYw2plyAt02sZouZcsRjG7TD"
dlthub_telemetry_endpoint: str = "https://api.segment.io/v1/track"
log_format: str = "{asctime}|[{levelname:<21}]|{process}|{thread}|{name}|{filename}|{funcName}:{lineno}|{message}"
log_level: str = "WARNING"
request_timeout: float = 60
Expand Down
4 changes: 1 addition & 3 deletions dlt/common/configuration/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from typing import Any, Dict, Mapping, NamedTuple, Optional, Tuple, Type, Sequence
from collections.abc import Mapping as C_Mapping

from dlt.common import json
from dlt.common.json import json
from dlt.common.typing import AnyType, TAny
from dlt.common.data_types import coerce_value, py_type_to_sc_type
from dlt.common.configuration.providers import EnvironProvider
Expand Down Expand Up @@ -122,8 +122,6 @@ def log_traces(
default_value: Any,
traces: Sequence[LookupTrace],
) -> None:
from dlt.common import logger

# if logger.is_logging() and logger.log_level() == "DEBUG" and config:
# logger.debug(f"Field {key} with type {hint} in {type(config).__name__} {'NOT RESOLVED' if value is None else 'RESOLVED'}")
# print(f"Field {key} with type {hint} in {type(config).__name__} {'NOT RESOLVED' if value is None else 'RESOLVED'}")
Expand Down
6 changes: 3 additions & 3 deletions dlt/common/data_types/type_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
import dataclasses
import datetime # noqa: I251
from collections.abc import Mapping as C_Mapping, Sequence as C_Sequence
from typing import Any, Type, Literal, Union, cast
from typing import Any, Type, Union
from enum import Enum

from dlt.common import pendulum, json, Decimal, Wei
from dlt.common.json import custom_pua_remove, json
from dlt.common.json._simplejson import custom_encode as json_custom_encode
from dlt.common.arithmetics import InvalidOperation
from dlt.common.wei import Wei
from dlt.common.arithmetics import InvalidOperation, Decimal
from dlt.common.data_types.typing import TDataType
from dlt.common.time import (
ensure_pendulum_datetime,
Expand Down
6 changes: 6 additions & 0 deletions dlt/common/data_writers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
DataWriterMetrics,
TDataItemFormat,
FileWriterSpec,
resolve_best_writer_spec,
get_best_writer_spec,
is_native_writer,
)
from dlt.common.data_writers.buffered import BufferedDataWriter, new_file_id
from dlt.common.data_writers.escape import (
Expand All @@ -14,6 +17,9 @@
__all__ = [
"DataWriter",
"FileWriterSpec",
"resolve_best_writer_spec",
"get_best_writer_spec",
"is_native_writer",
"DataWriterMetrics",
"TDataItemFormat",
"BufferedDataWriter",
Expand Down
4 changes: 1 addition & 3 deletions dlt/common/data_writers/buffered.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,7 @@ def __init__(
self.writer_spec = writer_spec
if self.writer_spec.requires_destination_capabilities and not _caps:
raise DestinationCapabilitiesRequired(self.writer_spec.file_format)
self.writer_cls = DataWriter.class_factory(
writer_spec.file_format, writer_spec.data_item_format
)
self.writer_cls = DataWriter.writer_class_from_spec(writer_spec)
self._supports_schema_changes = self.writer_spec.supports_schema_changes
self._caps = _caps
# validate if template has correct placeholders
Expand Down
24 changes: 24 additions & 0 deletions dlt/common/data_writers/escape.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from datetime import date, datetime, time # noqa: I251

from dlt.common.json import json
from dlt.common.pendulum import pendulum
from dlt.common.time import reduce_pendulum_datetime_precision

# use regex to escape characters in single pass
SQL_ESCAPE_DICT = {"'": "''", "\\": "\\\\", "\n": "\\n", "\r": "\\r"}
Expand Down Expand Up @@ -152,3 +154,25 @@ def escape_databricks_literal(v: Any) -> Any:
return "NULL"

return str(v)


def format_datetime_literal(v: pendulum.DateTime, precision: int = 6, no_tz: bool = False) -> str:
"""Converts `v` to ISO string, optionally without timezone spec (in UTC) and with given `precision`"""
if no_tz:
v = v.in_timezone(tz="UTC").replace(tzinfo=None)
v = reduce_pendulum_datetime_precision(v, precision)
# yet another precision translation
timespec: str = "microseconds"
if precision < 6:
timespec = "milliseconds"
elif precision < 3:
timespec = "seconds"
return "'" + v.isoformat(sep=" ", timespec=timespec) + "'"


def format_bigquery_datetime_literal(
v: pendulum.DateTime, precision: int = 6, no_tz: bool = False
) -> str:
"""Returns BigQuery-adjusted datetime literal by prefixing required `TIMESTAMP` indicator."""
# https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#timestamp_literals
return "TIMESTAMP " + format_datetime_literal(v, precision, no_tz)
Loading

0 comments on commit efaedc2

Please sign in to comment.