From 9805875f6e01a6b1c5bd2ef8c554cde3bd3bf9a2 Mon Sep 17 00:00:00 2001 From: Fawaz Ahmed Date: Sat, 20 Jul 2024 07:56:36 +0530 Subject: [PATCH 01/11] Fix https://github.com/pandas-dev/pandas/issues/20608 --- pandas/io/json/_json.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index b29ead1d14b1d..5a8f430486938 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -1167,6 +1167,7 @@ def _try_convert_data( """ Try to parse a Series into a column by inferring dtype. """ + backup_data = data # don't try to coerce, unless a force conversion if use_dtypes: if not self.dtype: @@ -1221,7 +1222,7 @@ def _try_convert_data( if len(data) and data.dtype in ("float", "object"): # coerce ints if we can try: - new_data = data.astype("int64") + new_data = backup_data.astype("int64") if (new_data == data).all(): data = new_data converted = True From fb63fd29a9234afbb0a8812af908fafbd580e873 Mon Sep 17 00:00:00 2001 From: Fawaz Ahmed Date: Thu, 1 Aug 2024 23:00:46 +0530 Subject: [PATCH 02/11] add test --- pandas/tests/io/json/test_large_number.py | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 pandas/tests/io/json/test_large_number.py diff --git a/pandas/tests/io/json/test_large_number.py b/pandas/tests/io/json/test_large_number.py new file mode 100644 index 0000000000000..8a13a6cb45d09 --- /dev/null +++ b/pandas/tests/io/json/test_large_number.py @@ -0,0 +1,6 @@ +from io import StringIO +import pytest +import pandas as pd + +def test_large_number(): + assert pd.read_json(StringIO('["9999999999999999"]'),orient="values",typ="series",convert_dates=False)[0] == 9999999999999999 From ed7e5f98f7e69267fc543b6638dff4b57ea431d2 Mon Sep 17 00:00:00 2001 From: Fawaz Ahmed Date: Thu, 1 Aug 2024 23:01:47 +0530 Subject: [PATCH 03/11] add test --- pandas/tests/io/json/test_large_number.py | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 pandas/tests/io/json/test_large_number.py diff --git a/pandas/tests/io/json/test_large_number.py b/pandas/tests/io/json/test_large_number.py new file mode 100644 index 0000000000000..8a13a6cb45d09 --- /dev/null +++ b/pandas/tests/io/json/test_large_number.py @@ -0,0 +1,6 @@ +from io import StringIO +import pytest +import pandas as pd + +def test_large_number(): + assert pd.read_json(StringIO('["9999999999999999"]'),orient="values",typ="series",convert_dates=False)[0] == 9999999999999999 From 0f9c6467307d5d85ecb9ac04ee90fa63ee640ee6 Mon Sep 17 00:00:00 2001 From: Fawaz Ahmed Date: Fri, 2 Aug 2024 03:57:38 +0530 Subject: [PATCH 04/11] Delete pandas/tests/io/json/test_large_number.py --- pandas/tests/io/json/test_large_number.py | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 pandas/tests/io/json/test_large_number.py diff --git a/pandas/tests/io/json/test_large_number.py b/pandas/tests/io/json/test_large_number.py deleted file mode 100644 index 8a13a6cb45d09..0000000000000 --- a/pandas/tests/io/json/test_large_number.py +++ /dev/null @@ -1,6 +0,0 @@ -from io import StringIO -import pytest -import pandas as pd - -def test_large_number(): - assert pd.read_json(StringIO('["9999999999999999"]'),orient="values",typ="series",convert_dates=False)[0] == 9999999999999999 From f1b04a5c88563531a2e1ba7b6992e74a570b4af6 Mon Sep 17 00:00:00 2001 From: Fawaz Ahmed Date: Fri, 2 Aug 2024 03:58:09 +0530 Subject: [PATCH 05/11] add test --- pandas/tests/io/json/test_pandas.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index d281729e9704c..c77661cde5a6a 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -2303,3 +2303,6 @@ def test_read_json_lines_rangeindex(): result = read_json(StringIO(data), lines=True).index expected = RangeIndex(2) tm.assert_index_equal(result, expected, exact=True) + +def test_large_number(): + assert pd.read_json(StringIO('["9999999999999999"]'),orient="values",typ="series",convert_dates=False)[0] == 9999999999999999 From 212c3b23f7263d0c827c0e9c013db92784ce00d2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 4 Sep 2024 20:28:57 +0000 Subject: [PATCH 06/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pandas/tests/io/json/test_pandas.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index f17c1f01476b5..17da46035f9a8 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -2287,5 +2287,14 @@ def test_read_json_lines_rangeindex(): expected = RangeIndex(2) tm.assert_index_equal(result, expected, exact=True) + def test_large_number(): - assert pd.read_json(StringIO('["9999999999999999"]'),orient="values",typ="series",convert_dates=False)[0] == 9999999999999999 + assert ( + pd.read_json( + StringIO('["9999999999999999"]'), + orient="values", + typ="series", + convert_dates=False, + )[0] + == 9999999999999999 + ) From 410ee0ebfbb12afca110e378b9ab05040dd2fb91 Mon Sep 17 00:00:00 2001 From: Fawaz Ahmed Date: Thu, 19 Sep 2024 03:32:35 +0530 Subject: [PATCH 07/11] Revert "add test" This reverts commit ed7e5f98f7e69267fc543b6638dff4b57ea431d2. --- pandas/tests/io/json/test_large_number.py | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 pandas/tests/io/json/test_large_number.py diff --git a/pandas/tests/io/json/test_large_number.py b/pandas/tests/io/json/test_large_number.py deleted file mode 100644 index 8a13a6cb45d09..0000000000000 --- a/pandas/tests/io/json/test_large_number.py +++ /dev/null @@ -1,6 +0,0 @@ -from io import StringIO -import pytest -import pandas as pd - -def test_large_number(): - assert pd.read_json(StringIO('["9999999999999999"]'),orient="values",typ="series",convert_dates=False)[0] == 9999999999999999 From c87f75869aac3393b092b54908b0e1933d401a82 Mon Sep 17 00:00:00 2001 From: Fawaz Ahmed Date: Thu, 19 Sep 2024 03:34:53 +0530 Subject: [PATCH 08/11] use a better variable name --- pandas/io/json/_json.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 33094f550e970..e9c9f5ba225a5 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -1168,7 +1168,7 @@ def _try_convert_data( """ Try to parse a Series into a column by inferring dtype. """ - backup_data = data + org_data = data # don't try to coerce, unless a force conversion if use_dtypes: if not self.dtype: @@ -1223,7 +1223,7 @@ def _try_convert_data( if len(data) and data.dtype in ("float", "object"): # coerce ints if we can try: - new_data = backup_data.astype("int64") + new_data = org_data.astype("int64") if (new_data == data).all(): data = new_data converted = True From 0f7f982ea0b76fb90635abbb501e0b6d50db8830 Mon Sep 17 00:00:00 2001 From: Fawaz Ahmed Date: Thu, 19 Sep 2024 06:21:48 +0530 Subject: [PATCH 09/11] fix pre-commit.ci issues --- pandas/tests/io/json/test_pandas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 0fb134245dcaa..595c095b8febc 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -2290,7 +2290,7 @@ def test_read_json_lines_rangeindex(): def test_large_number(): assert ( - pd.read_json( + read_json( StringIO('["9999999999999999"]'), orient="values", typ="series", From b9fa7147bf62383238de8f2ff619b9f01940e623 Mon Sep 17 00:00:00 2001 From: Fawaz Ahmed Date: Mon, 23 Sep 2024 06:39:15 +0530 Subject: [PATCH 10/11] use tm.assert_series_equal --- pandas/tests/io/json/test_pandas.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 595c095b8febc..d3328d1dfcaef 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -2289,12 +2289,12 @@ def test_read_json_lines_rangeindex(): def test_large_number(): - assert ( - read_json( - StringIO('["9999999999999999"]'), - orient="values", - typ="series", - convert_dates=False, - )[0] - == 9999999999999999 + # GH#20608 + result = read_json( + StringIO('["9999999999999999"]'), + orient="values", + typ="series", + convert_dates=False, ) + expected = Series([9999999999999999]) + tm.assert_series_equal(result, expected) From 4b159d3904931e0049eab7688e822d109062507d Mon Sep 17 00:00:00 2001 From: Fawaz Ahmed Date: Tue, 24 Sep 2024 01:15:51 +0530 Subject: [PATCH 11/11] Add whats new --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index c2a56afbc580e..3b5183c43bcd0 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -630,6 +630,7 @@ I/O - Bug in :meth:`read_csv` where the order of the ``na_values`` makes an inconsistency when ``na_values`` is a list non-string values. (:issue:`59303`) - Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`) - Bug in :meth:`read_json` not validating the ``typ`` argument to not be exactly ``"frame"`` or ``"series"`` (:issue:`59124`) +- Bug in :meth:`read_json` where extreme value integers in string format were incorrectly parsed as a different integer number (:issue:`20608`) - Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`) - Bug in :meth:`read_stata` where extreme value integers were incorrectly interpreted as missing for format versions 111 and prior (:issue:`58130`) - Bug in :meth:`read_stata` where the missing code for double was not recognised for format versions 105 and prior (:issue:`58149`)