Skip to content

Commit

Permalink
Integrate with dask>=2023.7.1, enable fugue pytest support, remove py…
Browse files Browse the repository at this point in the history
…thon 3.7 support, change pandas dtype system (#502)
  • Loading branch information
goodwanghan authored Aug 16, 2023
1 parent dbeeb1b commit ae269ee
Show file tree
Hide file tree
Showing 65 changed files with 707 additions and 731 deletions.
54 changes: 32 additions & 22 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -1,32 +1,42 @@
{
"name": "Fugue Development Environment",
"image": "fugueproject/devenv:0.7.7",
"settings": {
"terminal.integrated.shell.linux": "/bin/bash",
"python.pythonPath": "/usr/local/bin/python",
"python.linting.enabled": true,
"python.linting.pylintEnabled": true,
"python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8",
"python.formatting.blackPath": "/usr/local/py-utils/bin/black",
"python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf",
"python.linting.banditPath": "/usr/local/py-utils/bin/bandit",
"python.linting.flake8Path": "/usr/local/py-utils/bin/flake8",
"python.linting.mypyPath": "/usr/local/py-utils/bin/mypy",
"python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle",
"python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle",
"python.linting.pylintPath": "/usr/local/py-utils/bin/pylint"
"_image": "fugueproject/devenv:0.7.7",
"image": "mcr.microsoft.com/vscode/devcontainers/python:3.10",
"customizations": {
"vscode": {
"settings": {
"terminal.integrated.shell.linux": "/bin/bash",
"python.pythonPath": "/usr/local/bin/python",
"python.defaultInterpreterPath": "/usr/local/bin/python",
"isort.interpreter": [
"/usr/local/bin/python"
],
"flake8.interpreter": [
"/usr/local/bin/python"
],
"pylint.interpreter": [
"/usr/local/bin/python"
]
},
"extensions": [
"ms-python.python",
"ms-python.isort",
"ms-python.flake8",
"ms-python.pylint",
"ms-python.mypy",
"GitHub.copilot",
"njpwerner.autodocstring"
]
}
},
"extensions": [
"ms-python.python",
"ms-python.isort",
"GitHub.copilot",
"njpwerner.autodocstring"
],
"forwardPorts": [
8888
],
"postCreateCommand": "make devenv",
"features": {
"ghcr.io/devcontainers/features/docker-in-docker:2": {}
"ghcr.io/devcontainers/features/docker-in-docker:2": {},
"ghcr.io/devcontainers/features/java:1": {
"version": "11"
}
}
}
8 changes: 8 additions & 0 deletions .github/ISSUE_TEMPLATE/deprecation.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
name: Deprecation
about: Deprecate certain features
title: "[DEPRECATION]"
labels: ''
assignees: ''

---
5 changes: 1 addition & 4 deletions .github/workflows/test_core.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.7, "3.10"]
python-version: ["3.10"]

steps:
- uses: actions/checkout@v2
Expand All @@ -35,9 +35,6 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Fix setuptools_scm
run: pip install "setuptools_scm<7"
- name: Save time
if: matrix.python-version == 3.7
run: pip install "pandas<1.3.0"
- name: Install dependencies
run: make devenv
- name: Install pandas 2
Expand Down
55 changes: 55 additions & 0 deletions .github/workflows/test_dask.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: Dask Tests

on:
push:
branches: [ master ]
paths-ignore:
- 'docs/**'
- '**.md'
pull_request:
branches: [ master ]
paths-ignore:
- 'docs/**'
- '**.md'

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
test_dask_lower_bound:
name: Dask 2023.5.0
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- name: Set up Python 3.8
uses: actions/setup-python@v1
with:
python-version: 3.8
- name: Install dependencies
run: make devenv
- name: Setup Dask
run: pip install pyarrow==7.0.0 pandas==2.0.2 dask[dataframe,distributed]==2023.5.0
- name: Test
run: make testdask

test_dask_latest:
name: Dask Latest
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- name: Set up Python 3.10
uses: actions/setup-python@v1
with:
python-version: "3.10"
- name: Install dependencies
run: make devenv
- name: Setup Dask
run: pip install -U dask[dataframe,distributed] pyarrow pandas
- name: Test
run: make testdask
3 changes: 0 additions & 3 deletions .github/workflows/test_no_sql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,6 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Fix setuptools_scm
run: pip install "setuptools_scm<7"
- name: Save time
if: matrix.python-version == 3.7
run: pip install "pandas<1.3.0"
- name: Install dependencies
run: make devenv
- name: Install pandas 2
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_ray.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
- name: Install dependencies
run: make devenv
- name: Setup Ray
run: pip install ray[data]==2.1.0 pyarrow==6.0.1 pandas==1.5.3
run: pip install ray[data]==2.1.0 pyarrow==6.0.1 pandas==1.5.3 'pydantic<2'
- name: Test
run: make testray

Expand Down
2 changes: 1 addition & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[MESSAGES CONTROL]
disable = C0103,C0114,C0115,C0116,C0122,C0200,C0201,C0302,C0411,C0415,E0401,E0712,E1130,E5110,R0201,R0205,R0801,R0902,R0903,R0904,R0911,R0912,R0913,R0914,R0915,R1705,R1710,R1714,R1718,R1720,R1724,W0102,W0107,W0108,W0201,W0212,W0221,W0223,W0237,W0511,W0613,W0622,W0631,W0640,W0703,W0707,W1116
disable = C0103,C0114,C0115,C0116,C0122,C0200,C0201,C0302,C0411,C0415,E0401,E0712,E1130,E1136,E5110,R0201,R0205,R0801,R0902,R0903,R0904,R0911,R0912,R0913,R0914,R0915,R1705,R1710,R1714,R1718,R1720,R1724,W0102,W0107,W0108,W0201,W0212,W0221,W0223,W0237,W0511,W0613,W0622,W0631,W0640,W0703,W0707,W1116
# TODO: R0205: inherits from object, can be safely removed
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
[![PyPI license](https://img.shields.io/pypi/l/fugue.svg)](https://pypi.python.org/pypi/fugue/)
[![codecov](https://codecov.io/gh/fugue-project/fugue/branch/master/graph/badge.svg?token=ZO9YD5N3IA)](https://codecov.io/gh/fugue-project/fugue)
[![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fa5f2f53e6f48aaa1218a89f4808b91)](https://www.codacy.com/gh/fugue-project/fugue/dashboard?utm_source=github.com&utm_medium=referral&utm_content=fugue-project/fugue&utm_campaign=Badge_Grade)
[![Downloads](https://pepy.tech/badge/fugue)](https://pepy.tech/project/fugue)
[![Downloads](https://static.pepy.tech/badge/fugue)](https://pepy.tech/project/fugue)

| Tutorials | API Documentation | Chat with us on slack! |
| --------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------ |
Expand Down
10 changes: 10 additions & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
# Release Notes

## 0.8.7

- [506](https://github.com/fugue-project/fugue/issues/506) Adopt pandas `ExtensionDType`
- [504](https://github.com/fugue-project/fugue/issues/504) Create Fugue pytest fixtures
- [503](https://github.com/fugue-project/fugue/issues/503) Deprecate python 3.7 support
- [501](https://github.com/fugue-project/fugue/issues/501) Simplify zip/comap, remove join from the implementation
- [500](https://github.com/fugue-project/fugue/issues/500) Implement all partitioning strategies for Dask
- [495](https://github.com/fugue-project/fugue/issues/495) Resolve segfault on Duckdb 0.8.1
- [494](https://github.com/fugue-project/fugue/issues/494) Remove the version cap of Dask

## 0.8.6

- [497](https://github.com/fugue-project/fugue/issues/497) Make LocalExecutionEngine respect partition numbers
Expand Down
13 changes: 10 additions & 3 deletions fugue/_utils/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from triad.collections.fs import FileSystem
from triad.collections.schema import Schema
from triad.utils.assertion import assert_or_throw
from triad.utils.pandas_like import PD_UTILS

from fugue.dataframe import LocalBoundedDataFrame, LocalDataFrame, PandasDataFrame

Expand Down Expand Up @@ -154,8 +155,15 @@ def _get_single_files(


def _save_parquet(df: LocalDataFrame, p: FileParser, **kwargs: Any) -> None:
df.as_pandas().to_parquet(
p.uri, **{"engine": "pyarrow", "schema": df.schema.pa_schema, **kwargs}
PD_UTILS.to_parquet_friendly(
df.as_pandas(), partition_cols=kwargs.get("partition_cols", [])
).to_parquet(
p.uri,
**{
"engine": "pyarrow",
"schema": df.schema.pa_schema,
**kwargs,
},
)


Expand Down Expand Up @@ -194,7 +202,6 @@ def load_dir() -> pd.DataFrame:
except IsADirectoryError:
return load_dir()
except pd.errors.ParserError: # pragma: no cover
# for python < 3.7
return load_dir()
except PermissionError: # pragma: no cover
# for windows
Expand Down
2 changes: 1 addition & 1 deletion fugue/dataframe/array_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def rename(self, columns: Dict[str, str]) -> DataFrame:
return ArrayDataFrame(self.native, schema)

def alter_columns(self, columns: Any) -> DataFrame:
new_schema = self._get_altered_schema(columns)
new_schema = self.schema.alter(columns)
if new_schema == self.schema:
return self
temp = ArrayDataFrame(self.native, new_schema).as_array(type_safe=True)
Expand Down
Loading

0 comments on commit ae269ee

Please sign in to comment.