diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 3e9860d157e5f..9ab14e2a55662 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -181,7 +181,18 @@ def test_comparison_invalid(self, arg, arg2): with pytest.raises(TypeError, match=msg): x <= y - def test_timestamp_compare(self): + @pytest.mark.parametrize( + "left, right", + [ + ("gt", "lt"), + ("lt", "gt"), + ("ge", "le"), + ("le", "ge"), + ("eq", "eq"), + ("ne", "ne"), + ], + ) + def test_timestamp_compare(self, left, right): # make sure we can compare Timestamps on the right AND left hand side # GH#4982 df = DataFrame( @@ -194,40 +205,37 @@ def test_timestamp_compare(self): } ) df.loc[np.random.rand(len(df)) > 0.5, "dates2"] = pd.NaT - ops = {"gt": "lt", "lt": "gt", "ge": "le", "le": "ge", "eq": "eq", "ne": "ne"} - - for left, right in ops.items(): - left_f = getattr(operator, left) - right_f = getattr(operator, right) + left_f = getattr(operator, left) + right_f = getattr(operator, right) - # no nats - if left in ["eq", "ne"]: - expected = left_f(df, pd.Timestamp("20010109")) - result = right_f(pd.Timestamp("20010109"), df) - tm.assert_frame_equal(result, expected) - else: - msg = ( - "'(<|>)=?' not supported between " - "instances of 'numpy.ndarray' and 'Timestamp'" - ) - with pytest.raises(TypeError, match=msg): - left_f(df, pd.Timestamp("20010109")) - with pytest.raises(TypeError, match=msg): - right_f(pd.Timestamp("20010109"), df) - # nats - if left in ["eq", "ne"]: - expected = left_f(df, pd.Timestamp("nat")) - result = right_f(pd.Timestamp("nat"), df) - tm.assert_frame_equal(result, expected) - else: - msg = ( - "'(<|>)=?' not supported between " - "instances of 'numpy.ndarray' and 'NaTType'" - ) - with pytest.raises(TypeError, match=msg): - left_f(df, pd.Timestamp("nat")) - with pytest.raises(TypeError, match=msg): - right_f(pd.Timestamp("nat"), df) + # no nats + if left in ["eq", "ne"]: + expected = left_f(df, pd.Timestamp("20010109")) + result = right_f(pd.Timestamp("20010109"), df) + tm.assert_frame_equal(result, expected) + else: + msg = ( + "'(<|>)=?' not supported between " + "instances of 'numpy.ndarray' and 'Timestamp'" + ) + with pytest.raises(TypeError, match=msg): + left_f(df, pd.Timestamp("20010109")) + with pytest.raises(TypeError, match=msg): + right_f(pd.Timestamp("20010109"), df) + # nats + if left in ["eq", "ne"]: + expected = left_f(df, pd.Timestamp("nat")) + result = right_f(pd.Timestamp("nat"), df) + tm.assert_frame_equal(result, expected) + else: + msg = ( + "'(<|>)=?' not supported between " + "instances of 'numpy.ndarray' and 'NaTType'" + ) + with pytest.raises(TypeError, match=msg): + left_f(df, pd.Timestamp("nat")) + with pytest.raises(TypeError, match=msg): + right_f(pd.Timestamp("nat"), df) def test_mixed_comparison(self): # GH#13128, GH#22163 != datetime64 vs non-dt64 should be False, @@ -275,79 +283,81 @@ def test_df_string_comparison(self): class TestFrameFlexComparisons: # TODO: test_bool_flex_frame needs a better name - def test_bool_flex_frame(self): + @pytest.mark.parametrize("op", ["eq", "ne", "gt", "lt", "ge", "le"]) + def test_bool_flex_frame(self, op): data = np.random.randn(5, 3) other_data = np.random.randn(5, 3) df = DataFrame(data) other = DataFrame(other_data) ndim_5 = np.ones(df.shape + (1, 3)) - # Unaligned - def _check_unaligned_frame(meth, op, df, other): - part_o = other.loc[3:, 1:].copy() - rs = meth(part_o) - xp = op(df, part_o.reindex(index=df.index, columns=df.columns)) - tm.assert_frame_equal(rs, xp) - # DataFrame assert df.eq(df).values.all() assert not df.ne(df).values.any() - for op in ["eq", "ne", "gt", "lt", "ge", "le"]: - f = getattr(df, op) - o = getattr(operator, op) - # No NAs - tm.assert_frame_equal(f(other), o(df, other)) - _check_unaligned_frame(f, o, df, other) - # ndarray - tm.assert_frame_equal(f(other.values), o(df, other.values)) - # scalar - tm.assert_frame_equal(f(0), o(df, 0)) - # NAs - msg = "Unable to coerce to Series/DataFrame" - tm.assert_frame_equal(f(np.nan), o(df, np.nan)) - with pytest.raises(ValueError, match=msg): - f(ndim_5) + f = getattr(df, op) + o = getattr(operator, op) + # No NAs + tm.assert_frame_equal(f(other), o(df, other)) + # Unaligned + part_o = other.loc[3:, 1:].copy() + rs = f(part_o) + xp = o(df, part_o.reindex(index=df.index, columns=df.columns)) + tm.assert_frame_equal(rs, xp) + # ndarray + tm.assert_frame_equal(f(other.values), o(df, other.values)) + # scalar + tm.assert_frame_equal(f(0), o(df, 0)) + # NAs + msg = "Unable to coerce to Series/DataFrame" + tm.assert_frame_equal(f(np.nan), o(df, np.nan)) + with pytest.raises(ValueError, match=msg): + f(ndim_5) + @pytest.mark.parametrize("box", [np.array, Series]) + def test_bool_flex_series(self, box): # Series - def _test_seq(df, idx_ser, col_ser): - idx_eq = df.eq(idx_ser, axis=0) - col_eq = df.eq(col_ser) - idx_ne = df.ne(idx_ser, axis=0) - col_ne = df.ne(col_ser) - tm.assert_frame_equal(col_eq, df == Series(col_ser)) - tm.assert_frame_equal(col_eq, -col_ne) - tm.assert_frame_equal(idx_eq, -idx_ne) - tm.assert_frame_equal(idx_eq, df.T.eq(idx_ser).T) - tm.assert_frame_equal(col_eq, df.eq(list(col_ser))) - tm.assert_frame_equal(idx_eq, df.eq(Series(idx_ser), axis=0)) - tm.assert_frame_equal(idx_eq, df.eq(list(idx_ser), axis=0)) - - idx_gt = df.gt(idx_ser, axis=0) - col_gt = df.gt(col_ser) - idx_le = df.le(idx_ser, axis=0) - col_le = df.le(col_ser) - - tm.assert_frame_equal(col_gt, df > Series(col_ser)) - tm.assert_frame_equal(col_gt, -col_le) - tm.assert_frame_equal(idx_gt, -idx_le) - tm.assert_frame_equal(idx_gt, df.T.gt(idx_ser).T) - - idx_ge = df.ge(idx_ser, axis=0) - col_ge = df.ge(col_ser) - idx_lt = df.lt(idx_ser, axis=0) - col_lt = df.lt(col_ser) - tm.assert_frame_equal(col_ge, df >= Series(col_ser)) - tm.assert_frame_equal(col_ge, -col_lt) - tm.assert_frame_equal(idx_ge, -idx_lt) - tm.assert_frame_equal(idx_ge, df.T.ge(idx_ser).T) + # list/tuple + data = np.random.randn(5, 3) + df = DataFrame(data) + idx_ser = box(np.random.randn(5)) + col_ser = box(np.random.randn(3)) + + idx_eq = df.eq(idx_ser, axis=0) + col_eq = df.eq(col_ser) + idx_ne = df.ne(idx_ser, axis=0) + col_ne = df.ne(col_ser) + tm.assert_frame_equal(col_eq, df == Series(col_ser)) + tm.assert_frame_equal(col_eq, -col_ne) + tm.assert_frame_equal(idx_eq, -idx_ne) + tm.assert_frame_equal(idx_eq, df.T.eq(idx_ser).T) + tm.assert_frame_equal(col_eq, df.eq(list(col_ser))) + tm.assert_frame_equal(idx_eq, df.eq(Series(idx_ser), axis=0)) + tm.assert_frame_equal(idx_eq, df.eq(list(idx_ser), axis=0)) + + idx_gt = df.gt(idx_ser, axis=0) + col_gt = df.gt(col_ser) + idx_le = df.le(idx_ser, axis=0) + col_le = df.le(col_ser) + + tm.assert_frame_equal(col_gt, df > Series(col_ser)) + tm.assert_frame_equal(col_gt, -col_le) + tm.assert_frame_equal(idx_gt, -idx_le) + tm.assert_frame_equal(idx_gt, df.T.gt(idx_ser).T) + + idx_ge = df.ge(idx_ser, axis=0) + col_ge = df.ge(col_ser) + idx_lt = df.lt(idx_ser, axis=0) + col_lt = df.lt(col_ser) + tm.assert_frame_equal(col_ge, df >= Series(col_ser)) + tm.assert_frame_equal(col_ge, -col_lt) + tm.assert_frame_equal(idx_ge, -idx_lt) + tm.assert_frame_equal(idx_ge, df.T.ge(idx_ser).T) idx_ser = Series(np.random.randn(5)) col_ser = Series(np.random.randn(3)) - _test_seq(df, idx_ser, col_ser) - - # list/tuple - _test_seq(df, idx_ser.values, col_ser.values) + def test_bool_flex_frame_na(self): + df = DataFrame(np.random.randn(5, 3)) # NA df.loc[0, 0] = np.nan rs = df.eq(df) @@ -594,16 +604,16 @@ def test_arith_flex_frame_mixed( expected = f(int_frame, 2 * int_frame) tm.assert_frame_equal(result, expected) - def test_arith_flex_frame_raise(self, all_arithmetic_operators, float_frame): + @pytest.mark.parametrize("dim", range(3, 6)) + def test_arith_flex_frame_raise(self, all_arithmetic_operators, float_frame, dim): # one instance of parametrized fixture op = all_arithmetic_operators # Check that arrays with dim >= 3 raise - for dim in range(3, 6): - arr = np.ones((1,) * dim) - msg = "Unable to coerce to Series/DataFrame" - with pytest.raises(ValueError, match=msg): - getattr(float_frame, op)(arr) + arr = np.ones((1,) * dim) + msg = "Unable to coerce to Series/DataFrame" + with pytest.raises(ValueError, match=msg): + getattr(float_frame, op)(arr) def test_arith_flex_frame_corner(self, float_frame): @@ -623,19 +633,23 @@ def test_arith_flex_frame_corner(self, float_frame): with pytest.raises(NotImplementedError, match="fill_value"): float_frame.add(float_frame.iloc[0], axis="index", fill_value=3) - def test_arith_flex_series(self, simple_frame): + @pytest.mark.parametrize("op", ["add", "sub", "mul", "mod"]) + def test_arith_flex_series_ops(self, simple_frame, op): + # after arithmetic refactor, add truediv here df = simple_frame row = df.xs("a") col = df["two"] - # after arithmetic refactor, add truediv here - ops = ["add", "sub", "mul", "mod"] - for op in ops: - f = getattr(df, op) - op = getattr(operator, op) - tm.assert_frame_equal(f(row), op(df, row)) - tm.assert_frame_equal(f(col, axis=0), op(df.T, col).T) + f = getattr(df, op) + op = getattr(operator, op) + tm.assert_frame_equal(f(row), op(df, row)) + tm.assert_frame_equal(f(col, axis=0), op(df.T, col).T) + + def test_arith_flex_series(self, simple_frame): + df = simple_frame + row = df.xs("a") + col = df["two"] # special case for some reason tm.assert_frame_equal(df.add(row, axis=None), df + row) @@ -643,13 +657,10 @@ def test_arith_flex_series(self, simple_frame): tm.assert_frame_equal(df.div(row), df / row) tm.assert_frame_equal(df.div(col, axis=0), (df.T / col).T) + @pytest.mark.parametrize("dtype", ["int64", "float64"]) + def test_arith_flex_series_broadcasting(self, dtype): # broadcasting issue in GH 7325 - df = DataFrame(np.arange(3 * 2).reshape((3, 2)), dtype="int64") - expected = DataFrame([[np.nan, np.inf], [1.0, 1.5], [1.0, 1.25]]) - result = df.div(df[0], axis="index") - tm.assert_frame_equal(result, expected) - - df = DataFrame(np.arange(3 * 2).reshape((3, 2)), dtype="float64") + df = DataFrame(np.arange(3 * 2).reshape((3, 2)), dtype=dtype) expected = DataFrame([[np.nan, np.inf], [1.0, 1.5], [1.0, 1.25]]) result = df.div(df[0], axis="index") tm.assert_frame_equal(result, expected) @@ -1127,7 +1138,8 @@ def test_logical_typeerror_with_non_valid(self, op, res, float_frame): result = getattr(float_frame, op)("foo") assert bool(result.all().all()) is res - def test_binary_ops_align(self): + @pytest.mark.parametrize("op", ["add", "sub", "mul", "div", "truediv"]) + def test_binary_ops_align(self, op): # test aligning binary ops @@ -1144,29 +1156,29 @@ def test_binary_ops_align(self): ).sort_index() idx = pd.IndexSlice - for op in ["add", "sub", "mul", "div", "truediv"]: - opa = getattr(operator, op, None) - if opa is None: - continue + opa = getattr(operator, op, None) + if opa is None: + return - x = Series([1.0, 10.0, 100.0], [1, 2, 3]) - result = getattr(df, op)(x, level="third", axis=0) + x = Series([1.0, 10.0, 100.0], [1, 2, 3]) + result = getattr(df, op)(x, level="third", axis=0) - expected = pd.concat( - [opa(df.loc[idx[:, :, i], :], v) for i, v in x.items()] - ).sort_index() - tm.assert_frame_equal(result, expected) + expected = pd.concat( + [opa(df.loc[idx[:, :, i], :], v) for i, v in x.items()] + ).sort_index() + tm.assert_frame_equal(result, expected) - x = Series([1.0, 10.0], ["two", "three"]) - result = getattr(df, op)(x, level="second", axis=0) + x = Series([1.0, 10.0], ["two", "three"]) + result = getattr(df, op)(x, level="second", axis=0) - expected = ( - pd.concat([opa(df.loc[idx[:, i], :], v) for i, v in x.items()]) - .reindex_like(df) - .sort_index() - ) - tm.assert_frame_equal(result, expected) + expected = ( + pd.concat([opa(df.loc[idx[:, i], :], v) for i, v in x.items()]) + .reindex_like(df) + .sort_index() + ) + tm.assert_frame_equal(result, expected) + def test_binary_ops_align_series_dataframe(self): # GH9463 (alignment level of dataframe with series) midx = MultiIndex.from_product([["A", "B"], ["a", "b"]]) @@ -1368,42 +1380,38 @@ def test_combineFunc(self, float_frame, mixed_float_frame): assert result.index.equals(DataFrame().index) assert len(result.columns) == 0 - def test_comparisons(self, simple_frame, float_frame): + @pytest.mark.parametrize( + "func", + [operator.eq, operator.ne, operator.lt, operator.gt, operator.ge, operator.le], + ) + def test_comparisons(self, simple_frame, float_frame, func): df1 = tm.makeTimeDataFrame() df2 = tm.makeTimeDataFrame() row = simple_frame.xs("a") ndim_5 = np.ones(df1.shape + (1, 1, 1)) - def test_comp(func): - result = func(df1, df2) - tm.assert_numpy_array_equal(result.values, func(df1.values, df2.values)) + result = func(df1, df2) + tm.assert_numpy_array_equal(result.values, func(df1.values, df2.values)) - msg = ( - "Unable to coerce to Series/DataFrame, " - "dimension must be <= 2: (30, 4, 1, 1, 1)" - ) - with pytest.raises(ValueError, match=re.escape(msg)): - func(df1, ndim_5) - - result2 = func(simple_frame, row) - tm.assert_numpy_array_equal( - result2.values, func(simple_frame.values, row.values) - ) + msg = ( + "Unable to coerce to Series/DataFrame, " + "dimension must be <= 2: (30, 4, 1, 1, 1)" + ) + with pytest.raises(ValueError, match=re.escape(msg)): + func(df1, ndim_5) - result3 = func(float_frame, 0) - tm.assert_numpy_array_equal(result3.values, func(float_frame.values, 0)) + result2 = func(simple_frame, row) + tm.assert_numpy_array_equal( + result2.values, func(simple_frame.values, row.values) + ) - msg = "Can only compare identically-labeled DataFrame" - with pytest.raises(ValueError, match=msg): - func(simple_frame, simple_frame[:2]) + result3 = func(float_frame, 0) + tm.assert_numpy_array_equal(result3.values, func(float_frame.values, 0)) - test_comp(operator.eq) - test_comp(operator.ne) - test_comp(operator.lt) - test_comp(operator.gt) - test_comp(operator.ge) - test_comp(operator.le) + msg = "Can only compare identically-labeled DataFrame" + with pytest.raises(ValueError, match=msg): + func(simple_frame, simple_frame[:2]) def test_strings_to_numbers_comparisons_raises(self, compare_operators_no_eq_ne): # GH 11565 @@ -1453,7 +1461,7 @@ def test_boolean_comparison(self): with pytest.raises(ValueError, match=msg1d): # wrong shape - result = df > tup + df > tup # broadcasts like ndarray (GH#23000) result = df > b_r @@ -1474,10 +1482,10 @@ def test_boolean_comparison(self): tm.assert_frame_equal(result, expected) with pytest.raises(ValueError, match=msg1d): - result = df == lst + df == lst with pytest.raises(ValueError, match=msg1d): - result = df == tup + df == tup # broadcasts like ndarray (GH#23000) result = df == b_r @@ -1499,10 +1507,10 @@ def test_boolean_comparison(self): expected.columns = df.columns with pytest.raises(ValueError, match=msg1d): - result = df == lst + df == lst with pytest.raises(ValueError, match=msg1d): - result = df == tup + df == tup def test_inplace_ops_alignment(self): @@ -1650,37 +1658,51 @@ def test_inplace_ops_identity2(self, op): expected = id(df) assert id(df) == expected - def test_alignment_non_pandas(self): + @pytest.mark.parametrize( + "val", + [ + [1, 2, 3], + (1, 2, 3), + np.array([1, 2, 3], dtype=np.int64), + range(1, 4), + ], + ) + def test_alignment_non_pandas(self, val): index = ["A", "B", "C"] columns = ["X", "Y", "Z"] df = DataFrame(np.random.randn(3, 3), index=index, columns=columns) align = pd.core.ops.align_method_FRAME - for val in [ - [1, 2, 3], - (1, 2, 3), - np.array([1, 2, 3], dtype=np.int64), - range(1, 4), - ]: - expected = DataFrame({"X": val, "Y": val, "Z": val}, index=df.index) - tm.assert_frame_equal(align(df, val, "index")[1], expected) + expected = DataFrame({"X": val, "Y": val, "Z": val}, index=df.index) + tm.assert_frame_equal(align(df, val, "index")[1], expected) - expected = DataFrame( - {"X": [1, 1, 1], "Y": [2, 2, 2], "Z": [3, 3, 3]}, index=df.index - ) - tm.assert_frame_equal(align(df, val, "columns")[1], expected) + expected = DataFrame( + {"X": [1, 1, 1], "Y": [2, 2, 2], "Z": [3, 3, 3]}, index=df.index + ) + tm.assert_frame_equal(align(df, val, "columns")[1], expected) + + @pytest.mark.parametrize("val", [[1, 2], (1, 2), np.array([1, 2]), range(1, 3)]) + def test_alignment_non_pandas_length_mismatch(self, val): + index = ["A", "B", "C"] + columns = ["X", "Y", "Z"] + df = DataFrame(np.random.randn(3, 3), index=index, columns=columns) + align = pd.core.ops.align_method_FRAME # length mismatch msg = "Unable to coerce to Series, length must be 3: given 2" - for val in [[1, 2], (1, 2), np.array([1, 2]), range(1, 3)]: + with pytest.raises(ValueError, match=msg): + align(df, val, "index") - with pytest.raises(ValueError, match=msg): - align(df, val, "index") + with pytest.raises(ValueError, match=msg): + align(df, val, "columns") - with pytest.raises(ValueError, match=msg): - align(df, val, "columns") + def test_alignment_non_pandas_index_columns(self): + index = ["A", "B", "C"] + columns = ["X", "Y", "Z"] + df = DataFrame(np.random.randn(3, 3), index=index, columns=columns) + align = pd.core.ops.align_method_FRAME val = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) tm.assert_frame_equal( align(df, val, "index")[1], diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 9675f16ff61ad..475497cf8b20b 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -323,43 +323,34 @@ def test_constructor_list_of_2d_raises(self): with pytest.raises(ValueError, match=r"shape=\(2, 2, 1\)"): DataFrame([a, a]) - def test_constructor_mixed_dtypes(self): - def _make_mixed_dtypes_df(typ, ad=None): - - if typ == "int": - dtypes = MIXED_INT_DTYPES - arrays = [np.array(np.random.rand(10), dtype=d) for d in dtypes] - elif typ == "float": - dtypes = MIXED_FLOAT_DTYPES - arrays = [ - np.array(np.random.randint(10, size=10), dtype=d) for d in dtypes - ] - - for d, a in zip(dtypes, arrays): - assert a.dtype == d - if ad is None: - ad = {} - ad.update({d: a for d, a in zip(dtypes, arrays)}) - return DataFrame(ad) - - def _check_mixed_dtypes(df, dtypes=None): - if dtypes is None: - dtypes = MIXED_FLOAT_DTYPES + MIXED_INT_DTYPES - for d in dtypes: - if d in df: - assert df.dtypes[d] == d - - # mixed floating and integer coexist in the same frame - df = _make_mixed_dtypes_df("float") - _check_mixed_dtypes(df) - - # add lots of types - df = _make_mixed_dtypes_df("float", {"A": 1, "B": "foo", "C": "bar"}) - _check_mixed_dtypes(df) - - # GH 622 - df = _make_mixed_dtypes_df("int") - _check_mixed_dtypes(df) + @pytest.mark.parametrize( + "typ, ad", + [ + # mixed floating and integer coexist in the same frame + ["float", {}], + # add lots of types + ["float", {"A": 1, "B": "foo", "C": "bar"}], + # GH 622 + ["int", {}], + ], + ) + def test_constructor_mixed_dtypes(self, typ, ad): + if typ == "int": + dtypes = MIXED_INT_DTYPES + arrays = [np.array(np.random.rand(10), dtype=d) for d in dtypes] + elif typ == "float": + dtypes = MIXED_FLOAT_DTYPES + arrays = [np.array(np.random.randint(10, size=10), dtype=d) for d in dtypes] + + for d, a in zip(dtypes, arrays): + assert a.dtype == d + ad.update({d: a for d, a in zip(dtypes, arrays)}) + df = DataFrame(ad) + + dtypes = MIXED_FLOAT_DTYPES + MIXED_INT_DTYPES + for d in dtypes: + if d in df: + assert df.dtypes[d] == d def test_constructor_complex_dtypes(self): # GH10952 diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 73d1fbc26a56c..266b0e78e885e 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -714,16 +714,15 @@ def test_index_resolvers_come_after_columns_with_the_same_name(self): expected = df.loc[df.index[df.index > 5]] tm.assert_frame_equal(result, expected) - def test_inf(self): + @pytest.mark.parametrize("op, f", [["==", operator.eq], ["!=", operator.ne]]) + def test_inf(self, op, f): n = 10 df = DataFrame({"a": np.random.rand(n), "b": np.random.rand(n)}) df.loc[::2, 0] = np.inf - d = {"==": operator.eq, "!=": operator.ne} - for op, f in d.items(): - q = f"a {op} inf" - expected = df[f(df.a, np.inf)] - result = df.query(q, engine=self.engine, parser=self.parser) - tm.assert_frame_equal(result, expected) + q = f"a {op} inf" + expected = df[f(df.a, np.inf)] + result = df.query(q, engine=self.engine, parser=self.parser) + tm.assert_frame_equal(result, expected) def test_check_tz_aware_index_query(self, tz_aware_fixture): # https://github.com/pandas-dev/pandas/issues/29463 @@ -1053,18 +1052,24 @@ def test_query_with_nested_special_character(self, parser, engine): expec = df[df.a == "test & test"] tm.assert_frame_equal(res, expec) - def test_query_lex_compare_strings(self, parser, engine): + @pytest.mark.parametrize( + "op, func", + [ + ["<", operator.lt], + [">", operator.gt], + ["<=", operator.le], + [">=", operator.ge], + ], + ) + def test_query_lex_compare_strings(self, parser, engine, op, func): a = Series(np.random.choice(list("abcde"), 20)) b = Series(np.arange(a.size)) df = DataFrame({"X": a, "Y": b}) - ops = {"<": operator.lt, ">": operator.gt, "<=": operator.le, ">=": operator.ge} - - for op, func in ops.items(): - res = df.query(f'X {op} "d"', engine=engine, parser=parser) - expected = df[func(df.X, "d")] - tm.assert_frame_equal(res, expected) + res = df.query(f'X {op} "d"', engine=engine, parser=parser) + expected = df[func(df.X, "d")] + tm.assert_frame_equal(res, expected) def test_query_single_element_booleans(self, parser, engine): columns = "bid", "bidsize", "ask", "asksize" diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 7efcc0ce8556b..19326c185075f 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -601,32 +601,35 @@ def test_unstack_dtypes(self): ) tm.assert_series_equal(result, expected) - # GH7405 - for c, d in ( + @pytest.mark.parametrize( + "c, d", + ( (np.zeros(5), np.zeros(5)), (np.arange(5, dtype="f8"), np.arange(5, 10, dtype="f8")), - ): - - df = DataFrame( - { - "A": ["a"] * 5, - "C": c, - "D": d, - "B": date_range("2012-01-01", periods=5), - } - ) + ), + ) + def test_unstack_dtypes_mixed_date(self, c, d): + # GH7405 + df = DataFrame( + { + "A": ["a"] * 5, + "C": c, + "D": d, + "B": date_range("2012-01-01", periods=5), + } + ) - right = df.iloc[:3].copy(deep=True) + right = df.iloc[:3].copy(deep=True) - df = df.set_index(["A", "B"]) - df["D"] = df["D"].astype("int64") + df = df.set_index(["A", "B"]) + df["D"] = df["D"].astype("int64") - left = df.iloc[:3].unstack(0) - right = right.set_index(["A", "B"]).unstack(0) - right[("D", "a")] = right[("D", "a")].astype("int64") + left = df.iloc[:3].unstack(0) + right = right.set_index(["A", "B"]).unstack(0) + right[("D", "a")] = right[("D", "a")].astype("int64") - assert left.shape == (3, 2) - tm.assert_frame_equal(left, right) + assert left.shape == (3, 2) + tm.assert_frame_equal(left, right) def test_unstack_non_unique_index_names(self): idx = MultiIndex.from_tuples([("a", "b"), ("c", "d")], names=["c1", "c1"]) @@ -662,6 +665,16 @@ def test_unstack_unused_levels(self): tm.assert_frame_equal(result, expected) assert (result.columns.levels[1] == idx.levels[1]).all() + @pytest.mark.parametrize( + "level, idces, col_level, idx_level", + ( + (0, [13, 16, 6, 9, 2, 5, 8, 11], [np.nan, "a", 2], [np.nan, 5, 1]), + (1, [8, 11, 1, 4, 12, 15, 13, 16], [np.nan, 5, 1], [np.nan, "a", 2]), + ), + ) + def test_unstack_unused_levels_mixed_with_nan( + self, level, idces, col_level, idx_level + ): # With mixed dtype and NaN levels = [["a", 2, "c"], [1, 3, 5, 7]] codes = [[0, -1, 1, 1], [0, 2, -1, 2]] @@ -669,17 +682,12 @@ def test_unstack_unused_levels(self): data = np.arange(8) df = DataFrame(data.reshape(4, 2), index=idx) - cases = ( - (0, [13, 16, 6, 9, 2, 5, 8, 11], [np.nan, "a", 2], [np.nan, 5, 1]), - (1, [8, 11, 1, 4, 12, 15, 13, 16], [np.nan, 5, 1], [np.nan, "a", 2]), - ) - for level, idces, col_level, idx_level in cases: - result = df.unstack(level=level) - exp_data = np.zeros(18) * np.nan - exp_data[idces] = data - cols = MultiIndex.from_product([[0, 1], col_level]) - expected = DataFrame(exp_data.reshape(3, 6), index=idx_level, columns=cols) - tm.assert_frame_equal(result, expected) + result = df.unstack(level=level) + exp_data = np.zeros(18) * np.nan + exp_data[idces] = data + cols = MultiIndex.from_product([[0, 1], col_level]) + expected = DataFrame(exp_data.reshape(3, 6), index=idx_level, columns=cols) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("cols", [["A", "C"], slice(None)]) def test_unstack_unused_level(self, cols): @@ -1869,28 +1877,29 @@ def __init__(self, *args, **kwargs): with pytest.raises(Exception, match="Don't compute final result."): df.unstack() - def test_stack_order_with_unsorted_levels(self): + @pytest.mark.parametrize( + "levels", + itertools.chain.from_iterable( + itertools.product(itertools.permutations([0, 1, 2], width), repeat=2) + for width in [2, 3] + ), + ) + @pytest.mark.parametrize("stack_lev", range(2)) + def test_stack_order_with_unsorted_levels(self, levels, stack_lev): # GH#16323 - - def manual_compare_stacked(df, df_stacked, lev0, lev1): - assert all( - df.loc[row, col] == df_stacked.loc[(row, col[lev0]), col[lev1]] - for row in df.index - for col in df.columns - ) - # deep check for 1-row case - for width in [2, 3]: - levels_poss = itertools.product( - itertools.permutations([0, 1, 2], width), repeat=2 - ) + columns = MultiIndex(levels=levels, codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) + df = DataFrame(columns=columns, data=[range(4)]) + df_stacked = df.stack(stack_lev) + assert all( + df.loc[row, col] + == df_stacked.loc[(row, col[stack_lev]), col[1 - stack_lev]] + for row in df.index + for col in df.columns + ) - for levels in levels_poss: - columns = MultiIndex(levels=levels, codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) - df = DataFrame(columns=columns, data=[range(4)]) - for stack_lev in range(2): - df_stacked = df.stack(stack_lev) - manual_compare_stacked(df, df_stacked, stack_lev, 1 - stack_lev) + def test_stack_order_with_unsorted_levels_multi_row(self): + # GH#16323 # check multi-row case mi = MultiIndex( @@ -1900,7 +1909,11 @@ def manual_compare_stacked(df, df_stacked, lev0, lev1): df = DataFrame( columns=mi, index=range(5), data=np.arange(5 * len(mi)).reshape(5, -1) ) - manual_compare_stacked(df, df.stack(0), 0, 1) + assert all( + df.loc[row, col] == df.stack(0).loc[(row, col[0]), col[1]] + for row in df.index + for col in df.columns + ) def test_stack_unstack_unordered_multiindex(self): # GH# 18265