Skip to content

Commit

Permalink
DEPS: bump pyarrow version to 0.17.0 pandas-dev#38870 (pandas-dev#41476)
Browse files Browse the repository at this point in the history
  • Loading branch information
fangchenli authored May 17, 2021
1 parent b9195cd commit 9ae8f1d
Show file tree
Hide file tree
Showing 19 changed files with 46 additions and 65 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/database.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ jobs:
- uses: conda-incubator/setup-miniconda@v2
with:
activate-environment: pandas-dev
channel-priority: strict
channel-priority: flexible
environment-file: ${{ matrix.ENV_FILE }}
use-only-tar-bz2: true

Expand Down
3 changes: 2 additions & 1 deletion ci/deps/actions-37-db-min.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ dependencies:
- openpyxl
- pandas-gbq
- google-cloud-bigquery>=1.27.2 # GH 36436
- pyarrow=0.17 # GH 38803
- protobuf>=3.12.4
- pyarrow=0.17.1 # GH 38803
- pytables>=3.5.1
- scipy
- xarray=0.12.3
Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-37-db.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ dependencies:
- pandas-gbq
- google-cloud-bigquery>=1.27.2 # GH 36436
- psycopg2
- pyarrow>=0.15.0
- pyarrow>=0.17.0
- pymysql
- pytables
- python-snappy
Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-37-minimum_versions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ dependencies:
- pytables=3.5.1
- python-dateutil=2.7.3
- pytz=2017.3
- pyarrow=0.15
- pyarrow=0.17.0
- scipy=1.2
- xlrd=1.2.0
- xlsxwriter=1.0.2
Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-37.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ dependencies:
- numpy=1.19
- python-dateutil
- nomkl
- pyarrow=0.15.1
- pyarrow
- pytz
- s3fs>=0.4.0
- moto>=1.3.14
Expand Down
3 changes: 2 additions & 1 deletion ci/deps/azure-macos-37.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
name: pandas-dev
channels:
- defaults
- conda-forge
dependencies:
- python=3.7.*

Expand All @@ -21,7 +22,7 @@ dependencies:
- numexpr
- numpy=1.17.3
- openpyxl
- pyarrow=0.15.1
- pyarrow=0.17.0
- pytables
- python-dateutil==2.7.3
- pytz
Expand Down
2 changes: 1 addition & 1 deletion ci/deps/azure-windows-37.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ dependencies:
- numexpr
- numpy=1.17.*
- openpyxl
- pyarrow=0.15
- pyarrow=0.17.0
- pytables
- python-dateutil
- pytz
Expand Down
2 changes: 1 addition & 1 deletion ci/deps/azure-windows-38.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ dependencies:
- numpy=1.18.*
- openpyxl
- jinja2
- pyarrow>=0.15.0
- pyarrow>=0.17.0
- pytables
- python-dateutil
- pytz
Expand Down
2 changes: 1 addition & 1 deletion doc/source/getting_started/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ PyTables 3.5.1 HDF5-based reading / writing
blosc 1.17.0 Compression for HDF5
zlib Compression for HDF5
fastparquet 0.4.0 Parquet reading / writing
pyarrow 0.15.0 Parquet, ORC, and feather reading / writing
pyarrow 0.17.0 Parquet, ORC, and feather reading / writing
pyreadstat SPSS files (.sav) reading
========================= ================== =============================================================

Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -579,7 +579,7 @@ Optional libraries below the lowest tested version may still work, but are not c
+-----------------+-----------------+---------+
| openpyxl | 3.0.0 | X |
+-----------------+-----------------+---------+
| pyarrow | 0.15.0 | |
| pyarrow | 0.17.0 | X |
+-----------------+-----------------+---------+
| pymysql | 0.8.1 | X |
+-----------------+-----------------+---------+
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ dependencies:
- odfpy

- fastparquet>=0.3.2 # pandas.read_parquet, DataFrame.to_parquet
- pyarrow>=0.15.0 # pandas.read_parquet, DataFrame.to_parquet, pandas.read_feather, DataFrame.to_feather
- pyarrow>=0.17.0 # pandas.read_parquet, DataFrame.to_parquet, pandas.read_feather, DataFrame.to_feather
- python-snappy # required by pyarrow

- pyqt>=5.9.2 # pandas.read_clipboard
Expand Down
2 changes: 1 addition & 1 deletion pandas/compat/_optional.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"odfpy": "1.3.0",
"openpyxl": "3.0.0",
"pandas_gbq": "0.12.0",
"pyarrow": "0.15.0",
"pyarrow": "0.17.0",
"pytest": "5.0.1",
"pyxlsb": "1.0.6",
"s3fs": "0.4.0",
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/interval/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def test_repr():
# Arrow interaction


pyarrow_skip = td.skip_if_no("pyarrow", min_version="0.16.0")
pyarrow_skip = td.skip_if_no("pyarrow")


@pyarrow_skip
Expand Down
16 changes: 6 additions & 10 deletions pandas/tests/arrays/masked/test_arrow_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pandas as pd
import pandas._testing as tm

pa = pytest.importorskip("pyarrow", minversion="0.15.0")
pa = pytest.importorskip("pyarrow", minversion="0.17.0")

from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask

Expand All @@ -21,8 +21,6 @@ def data(request):


def test_arrow_array(data):
# protocol added in 0.15.0

arr = pa.array(data)
expected = pa.array(
data.to_numpy(object, na_value=None),
Expand All @@ -31,10 +29,8 @@ def test_arrow_array(data):
assert arr.equals(expected)


@td.skip_if_no("pyarrow", min_version="0.16.0")
@td.skip_if_no("pyarrow")
def test_arrow_roundtrip(data):
# roundtrip possible from arrow 0.16.0

df = pd.DataFrame({"a": data})
table = pa.table(df)
assert table.field("a").type == str(data.dtype.numpy_dtype)
Expand All @@ -43,7 +39,7 @@ def test_arrow_roundtrip(data):
tm.assert_frame_equal(result, df)


@td.skip_if_no("pyarrow", min_version="0.16.0")
@td.skip_if_no("pyarrow")
def test_arrow_load_from_zero_chunks(data):
# GH-41040

Expand All @@ -58,7 +54,7 @@ def test_arrow_load_from_zero_chunks(data):
tm.assert_frame_equal(result, df)


@td.skip_if_no("pyarrow", min_version="0.16.0")
@td.skip_if_no("pyarrow")
def test_arrow_from_arrow_uint():
# https://github.com/pandas-dev/pandas/issues/31896
# possible mismatch in types
Expand All @@ -70,7 +66,7 @@ def test_arrow_from_arrow_uint():
tm.assert_extension_array_equal(result, expected)


@td.skip_if_no("pyarrow", min_version="0.16.0")
@td.skip_if_no("pyarrow")
def test_arrow_sliced(data):
# https://github.com/pandas-dev/pandas/issues/38525

Expand Down Expand Up @@ -165,7 +161,7 @@ def test_pyarrow_array_to_numpy_and_mask(np_dtype_to_arrays):
tm.assert_numpy_array_equal(mask, mask_expected_empty)


@td.skip_if_no("pyarrow", min_version="0.16.0")
@td.skip_if_no("pyarrow")
def test_from_arrow_type_error(request, data):
# ensure that __from_arrow__ returns a TypeError when getting a wrong
# array type
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/period/test_arrow_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
period_array,
)

pyarrow_skip = pyarrow_skip = td.skip_if_no("pyarrow", min_version="0.16.0")
pyarrow_skip = td.skip_if_no("pyarrow", min_version="0.17.0")


@pyarrow_skip
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/arrays/string_/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,7 @@ def test_fillna_args(dtype, request):
arr.fillna(value=1)


@td.skip_if_no("pyarrow", min_version="0.15.0")
@td.skip_if_no("pyarrow")
def test_arrow_array(dtype):
# protocol added in 0.15.0
import pyarrow as pa
Expand All @@ -451,7 +451,7 @@ def test_arrow_array(dtype):
assert arr.equals(expected)


@td.skip_if_no("pyarrow", min_version="0.16.0")
@td.skip_if_no("pyarrow")
def test_arrow_roundtrip(dtype, dtype_object):
# roundtrip possible from arrow 1.0.0
import pyarrow as pa
Expand All @@ -467,7 +467,7 @@ def test_arrow_roundtrip(dtype, dtype_object):
assert result.loc[2, "a"] is pd.NA


@td.skip_if_no("pyarrow", min_version="0.16.0")
@td.skip_if_no("pyarrow")
def test_arrow_load_from_zero_chunks(dtype, dtype_object):
# GH-41040
import pyarrow as pa
Expand Down
13 changes: 5 additions & 8 deletions pandas/tests/io/test_feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,12 @@

import pandas as pd
import pandas._testing as tm
from pandas.util.version import Version

from pandas.io.feather_format import read_feather, to_feather # isort:skip

pyarrow = pytest.importorskip("pyarrow")


pyarrow_version = Version(pyarrow.__version__)
filter_sparse = pytest.mark.filterwarnings("ignore:The Sparse")


Expand Down Expand Up @@ -89,12 +87,11 @@ def test_basic(self):
),
}
)
if pyarrow_version >= Version("0.17.0"):
df["periods"] = pd.period_range("2013", freq="M", periods=3)
df["timedeltas"] = pd.timedelta_range("1 day", periods=3)
# TODO temporary disable due to regression in pyarrow 0.17.1
# https://github.com/pandas-dev/pandas/issues/34255
# df["intervals"] = pd.interval_range(0, 3, 3)
df["periods"] = pd.period_range("2013", freq="M", periods=3)
df["timedeltas"] = pd.timedelta_range("1 day", periods=3)
# TODO temporary disable due to regression in pyarrow 0.17.1
# https://github.com/pandas-dev/pandas/issues/34255
# df["intervals"] = pd.interval_range(0, 3, 3)

assert df.dttz.dtype.tz.zone == "US/Eastern"
self.check_round_trip(df)
Expand Down
44 changes: 15 additions & 29 deletions pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@
PY38,
is_platform_windows,
)
from pandas.compat.pyarrow import (
pa_version_under1p0,
pa_version_under2p0,
)
import pandas.util._test_decorators as td

import pandas as pd
Expand Down Expand Up @@ -653,8 +657,6 @@ def test_categorical(self, pa):
)
def test_s3_roundtrip_explicit_fs(self, df_compat, s3_resource, pa, s3so):
s3fs = pytest.importorskip("s3fs")
if Version(pyarrow.__version__) <= Version("0.17.0"):
pytest.skip()
s3 = s3fs.S3FileSystem(**s3so)
kw = {"filesystem": s3}
check_round_trip(
Expand All @@ -666,8 +668,6 @@ def test_s3_roundtrip_explicit_fs(self, df_compat, s3_resource, pa, s3so):
)

def test_s3_roundtrip(self, df_compat, s3_resource, pa, s3so):
if Version(pyarrow.__version__) <= Version("0.17.0"):
pytest.skip()
# GH #19134
s3so = {"storage_options": s3so}
check_round_trip(
Expand Down Expand Up @@ -698,14 +698,12 @@ def test_s3_roundtrip_for_dir(
# These are added to back of dataframe on read. In new API category dtype is
# only used if partition field is string, but this changed again to use
# category dtype for all types (not only strings) in pyarrow 2.0.0
pa10 = (Version(pyarrow.__version__) >= Version("1.0.0")) and (
Version(pyarrow.__version__) < Version("2.0.0")
)
if partition_col:
if pa10:
partition_col_type = "int32"
else:
partition_col_type = "category"
partition_col_type = (
"int32"
if (not pa_version_under1p0) and pa_version_under2p0
else "category"
)

expected_df[partition_col] = expected_df[partition_col].astype(
partition_col_type
Expand Down Expand Up @@ -795,7 +793,7 @@ def test_write_with_schema(self, pa):
out_df = df.astype(bool)
check_round_trip(df, pa, write_kwargs={"schema": schema}, expected=out_df)

@td.skip_if_no("pyarrow", min_version="0.15.0")
@td.skip_if_no("pyarrow")
def test_additional_extension_arrays(self, pa):
# test additional ExtensionArrays that are supported through the
# __arrow_array__ protocol
Expand All @@ -806,22 +804,10 @@ def test_additional_extension_arrays(self, pa):
"c": pd.Series(["a", None, "c"], dtype="string"),
}
)
if Version(pyarrow.__version__) >= Version("0.16.0"):
expected = df
else:
# de-serialized as plain int / object
expected = df.assign(
a=df.a.astype("int64"), b=df.b.astype("int64"), c=df.c.astype("object")
)
check_round_trip(df, pa, expected=expected)
check_round_trip(df, pa)

df = pd.DataFrame({"a": pd.Series([1, 2, 3, None], dtype="Int64")})
if Version(pyarrow.__version__) >= Version("0.16.0"):
expected = df
else:
# if missing values in integer, currently de-serialized as float
expected = df.assign(a=df.a.astype("float64"))
check_round_trip(df, pa, expected=expected)
check_round_trip(df, pa)

@td.skip_if_no("pyarrow", min_version="1.0.0")
def test_pyarrow_backed_string_array(self, pa):
Expand All @@ -831,7 +817,7 @@ def test_pyarrow_backed_string_array(self, pa):
df = pd.DataFrame({"a": pd.Series(["a", None, "c"], dtype="arrow_string")})
check_round_trip(df, pa, expected=df)

@td.skip_if_no("pyarrow", min_version="0.16.0")
@td.skip_if_no("pyarrow")
def test_additional_extension_types(self, pa):
# test additional ExtensionArrays that are supported through the
# __arrow_array__ protocol + by defining a custom ExtensionType
Expand All @@ -844,7 +830,7 @@ def test_additional_extension_types(self, pa):
)
check_round_trip(df, pa)

@td.skip_if_no("pyarrow", min_version="0.16.0")
@td.skip_if_no("pyarrow")
def test_use_nullable_dtypes(self, pa):
import pyarrow.parquet as pq

Expand Down Expand Up @@ -880,7 +866,7 @@ def test_timestamp_nanoseconds(self, pa):
check_round_trip(df, pa, write_kwargs={"version": "2.0"})

def test_timezone_aware_index(self, pa, timezone_aware_date_list):
if Version(pyarrow.__version__) >= Version("2.0.0"):
if not pa_version_under2p0:
# temporary skip this test until it is properly resolved
# https://github.com/pandas-dev/pandas/issues/37286
pytest.skip()
Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ xlsxwriter
xlwt
odfpy
fastparquet>=0.3.2
pyarrow>=0.15.0
pyarrow>=0.17.0
python-snappy
pyqt5>=5.9.2
tables>=3.5.1
Expand Down

0 comments on commit 9ae8f1d

Please sign in to comment.