From 21a3b2f90b824ea9d7d5cad3a1048c765b898aed Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Mon, 28 Feb 2022 14:27:00 -0800
Subject: [PATCH] DOC: Remove computation.rst in favor of better docstrings
 (#46170)

* DOC: Remove computation.rst in favor of better docstrings:

* Remove other ref
---
 doc/source/user_guide/computation.rst | 212 --------------------------
 doc/source/user_guide/index.rst       |   1 -
 doc/source/user_guide/window.rst      |  14 +-
 doc/source/whatsnew/v0.6.0.rst        |   2 +-
 doc/source/whatsnew/v0.6.1.rst        |   4 +-
 doc/source/whatsnew/v0.8.0.rst        |   2 +-
 pandas/core/frame.py                  |  40 ++++-
 pandas/core/generic.py                |  14 +-
 pandas/core/series.py                 |  10 +-
 9 files changed, 74 insertions(+), 225 deletions(-)
 delete mode 100644 doc/source/user_guide/computation.rst

diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst
deleted file mode 100644
index 6007129e96ba0..0000000000000
--- a/doc/source/user_guide/computation.rst
+++ /dev/null
@@ -1,212 +0,0 @@
-.. _computation:
-
-{{ header }}
-
-Computational tools
-===================
-
-
-Statistical functions
----------------------
-
-.. _computation.pct_change:
-
-Percent change
-~~~~~~~~~~~~~~
-
-``Series`` and ``DataFrame`` have a method
-:meth:`~DataFrame.pct_change` to compute the percent change over a given number
-of periods (using ``fill_method`` to fill NA/null values *before* computing
-the percent change).
-
-.. ipython:: python
-
-   ser = pd.Series(np.random.randn(8))
-
-   ser.pct_change()
-
-.. ipython:: python
-
-   df = pd.DataFrame(np.random.randn(10, 4))
-
-   df.pct_change(periods=3)
-
-.. _computation.covariance:
-
-Covariance
-~~~~~~~~~~
-
-:meth:`Series.cov` can be used to compute covariance between series
-(excluding missing values).
-
-.. ipython:: python
-
-   s1 = pd.Series(np.random.randn(1000))
-   s2 = pd.Series(np.random.randn(1000))
-   s1.cov(s2)
-
-Analogously, :meth:`DataFrame.cov` to compute pairwise covariances among the
-series in the DataFrame, also excluding NA/null values.
-
-.. _computation.covariance.caveats:
-
-.. note::
-
-    Assuming the missing data are missing at random this results in an estimate
-    for the covariance matrix which is unbiased. However, for many applications
-    this estimate may not be acceptable because the estimated covariance matrix
-    is not guaranteed to be positive semi-definite. This could lead to
-    estimated correlations having absolute values which are greater than one,
-    and/or a non-invertible covariance matrix. See `Estimation of covariance
-    matrices <https://en.wikipedia.org/w/index.php?title=Estimation_of_covariance_matrices>`_
-    for more details.
-
-.. ipython:: python
-
-   frame = pd.DataFrame(np.random.randn(1000, 5), columns=["a", "b", "c", "d", "e"])
-   frame.cov()
-
-``DataFrame.cov`` also supports an optional ``min_periods`` keyword that
-specifies the required minimum number of observations for each column pair
-in order to have a valid result.
-
-.. ipython:: python
-
-   frame = pd.DataFrame(np.random.randn(20, 3), columns=["a", "b", "c"])
-   frame.loc[frame.index[:5], "a"] = np.nan
-   frame.loc[frame.index[5:10], "b"] = np.nan
-
-   frame.cov()
-
-   frame.cov(min_periods=12)
-
-
-.. _computation.correlation:
-
-Correlation
-~~~~~~~~~~~
-
-Correlation may be computed using the :meth:`~DataFrame.corr` method.
-Using the ``method`` parameter, several methods for computing correlations are
-provided:
-
-.. csv-table::
-    :header: "Method name", "Description"
-    :widths: 20, 80
-
-    ``pearson (default)``, Standard correlation coefficient
-    ``kendall``, Kendall Tau correlation coefficient
-    ``spearman``, Spearman rank correlation coefficient
-
-.. \rho = \cov(x, y) / \sigma_x \sigma_y
-
-All of these are currently computed using pairwise complete observations.
-Wikipedia has articles covering the above correlation coefficients:
-
-* `Pearson correlation coefficient <https://en.wikipedia.org/wiki/Pearson_correlation_coefficient>`_
-* `Kendall rank correlation coefficient <https://en.wikipedia.org/wiki/Kendall_rank_correlation_coefficient>`_
-* `Spearman's rank correlation coefficient <https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient>`_
-
-.. note::
-
-    Please see the :ref:`caveats <computation.covariance.caveats>` associated
-    with this method of calculating correlation matrices in the
-    :ref:`covariance section <computation.covariance>`.
-
-.. ipython:: python
-
-   frame = pd.DataFrame(np.random.randn(1000, 5), columns=["a", "b", "c", "d", "e"])
-   frame.iloc[::2] = np.nan
-
-   # Series with Series
-   frame["a"].corr(frame["b"])
-   frame["a"].corr(frame["b"], method="spearman")
-
-   # Pairwise correlation of DataFrame columns
-   frame.corr()
-
-Note that non-numeric columns will be automatically excluded from the
-correlation calculation.
-
-Like ``cov``, ``corr`` also supports the optional ``min_periods`` keyword:
-
-.. ipython:: python
-
-   frame = pd.DataFrame(np.random.randn(20, 3), columns=["a", "b", "c"])
-   frame.loc[frame.index[:5], "a"] = np.nan
-   frame.loc[frame.index[5:10], "b"] = np.nan
-
-   frame.corr()
-
-   frame.corr(min_periods=12)
-
-
-The ``method`` argument can also be a callable for a generic correlation
-calculation. In this case, it should be a single function
-that produces a single value from two ndarray inputs. Suppose we wanted to
-compute the correlation based on histogram intersection:
-
-.. ipython:: python
-
-   # histogram intersection
-   def histogram_intersection(a, b):
-       return np.minimum(np.true_divide(a, a.sum()), np.true_divide(b, b.sum())).sum()
-
-
-   frame.corr(method=histogram_intersection)
-
-A related method :meth:`~DataFrame.corrwith` is implemented on DataFrame to
-compute the correlation between like-labeled Series contained in different
-DataFrame objects.
-
-.. ipython:: python
-
-   index = ["a", "b", "c", "d", "e"]
-   columns = ["one", "two", "three", "four"]
-   df1 = pd.DataFrame(np.random.randn(5, 4), index=index, columns=columns)
-   df2 = pd.DataFrame(np.random.randn(4, 4), index=index[:4], columns=columns)
-   df1.corrwith(df2)
-   df2.corrwith(df1, axis=1)
-
-.. _computation.ranking:
-
-Data ranking
-~~~~~~~~~~~~
-
-The :meth:`~Series.rank` method produces a data ranking with ties being
-assigned the mean of the ranks (by default) for the group:
-
-.. ipython:: python
-
-   s = pd.Series(np.random.randn(5), index=list("abcde"))
-   s["d"] = s["b"]  # so there's a tie
-   s.rank()
-
-:meth:`~DataFrame.rank` is also a DataFrame method and can rank either the rows
-(``axis=0``) or the columns (``axis=1``). ``NaN`` values are excluded from the
-ranking.
-
-.. ipython:: python
-
-   df = pd.DataFrame(np.random.randn(10, 6))
-   df[4] = df[2][:5]  # some ties
-   df
-   df.rank(1)
-
-``rank`` optionally takes a parameter ``ascending`` which by default is true;
-when false, data is reverse-ranked, with larger values assigned a smaller rank.
-
-``rank`` supports different tie-breaking methods, specified with the ``method``
-parameter:
-
-  - ``average`` : average rank of tied group
-  - ``min`` : lowest rank in the group
-  - ``max`` : highest rank in the group
-  - ``first`` : ranks assigned in the order they appear in the array
-
-.. _computation.windowing:
-
-Windowing functions
-~~~~~~~~~~~~~~~~~~~
-
-See :ref:`the window operations user guide <window.overview>` for an overview of windowing functions.
diff --git a/doc/source/user_guide/index.rst b/doc/source/user_guide/index.rst
index 59c9a9afb7f95..a6392706eb7a3 100644
--- a/doc/source/user_guide/index.rst
+++ b/doc/source/user_guide/index.rst
@@ -76,7 +76,6 @@ Guides
     boolean
     visualization
     style
-    computation
     groupby
     window
     timeseries
diff --git a/doc/source/user_guide/window.rst b/doc/source/user_guide/window.rst
index d1244f62cc1e4..f8c1f89be5d41 100644
--- a/doc/source/user_guide/window.rst
+++ b/doc/source/user_guide/window.rst
@@ -427,10 +427,16 @@ can even be omitted:
 .. note::
 
     Missing values are ignored and each entry is computed using the pairwise
-    complete observations.  Please see the :ref:`covariance section
-    <computation.covariance>` for :ref:`caveats
-    <computation.covariance.caveats>` associated with this method of
-    calculating covariance and correlation matrices.
+    complete observations.
+
+    Assuming the missing data are missing at random this results in an estimate
+    for the covariance matrix which is unbiased. However, for many applications
+    this estimate may not be acceptable because the estimated covariance matrix
+    is not guaranteed to be positive semi-definite. This could lead to
+    estimated correlations having absolute values which are greater than one,
+    and/or a non-invertible covariance matrix. See `Estimation of covariance
+    matrices <https://en.wikipedia.org/w/index.php?title=Estimation_of_covariance_matrices>`_
+    for more details.
 
 .. ipython:: python
 
diff --git a/doc/source/whatsnew/v0.6.0.rst b/doc/source/whatsnew/v0.6.0.rst
index 19e2e85c09a87..5ddcd5d90e65c 100644
--- a/doc/source/whatsnew/v0.6.0.rst
+++ b/doc/source/whatsnew/v0.6.0.rst
@@ -24,7 +24,7 @@ New features
 - :ref:`Added <groupby.multiindex>` multiple levels to groupby (:issue:`103`)
 - :ref:`Allow <basics.sorting>` multiple columns in ``by`` argument of ``DataFrame.sort_index`` (:issue:`92`, :issue:`362`)
 - :ref:`Added <indexing.basics.get_value>` fast ``get_value`` and ``put_value`` methods to DataFrame (:issue:`360`)
-- :ref:`Added <computation.covariance>` ``cov`` instance methods to Series and DataFrame (:issue:`194`, :issue:`362`)
+- Added ``cov`` instance methods to Series and DataFrame (:issue:`194`, :issue:`362`)
 - :ref:`Added <visualization.barplot>` ``kind='bar'`` option to ``DataFrame.plot`` (:issue:`348`)
 - :ref:`Added <basics.idxmin>` ``idxmin`` and ``idxmax`` to Series and DataFrame (:issue:`286`)
 - :ref:`Added <io.clipboard>` ``read_clipboard`` function to parse DataFrame from clipboard (:issue:`300`)
diff --git a/doc/source/whatsnew/v0.6.1.rst b/doc/source/whatsnew/v0.6.1.rst
index 4e72a630ad9f1..58a7d1ee13278 100644
--- a/doc/source/whatsnew/v0.6.1.rst
+++ b/doc/source/whatsnew/v0.6.1.rst
@@ -7,7 +7,7 @@ Version 0.6.1 (December 13, 2011)
 New features
 ~~~~~~~~~~~~
 - Can append single rows (as Series) to a DataFrame
-- Add Spearman and Kendall rank :ref:`correlation <computation.correlation>`
+- Add Spearman and Kendall rank correlation
   options to Series.corr and DataFrame.corr (:issue:`428`)
 - :ref:`Added <indexing.basics.get_value>` ``get_value`` and ``set_value`` methods to
   Series, DataFrame, and Panel for very low-overhead access (>2x faster in many
@@ -19,7 +19,7 @@ New features
 - Implement new :ref:`SparseArray <sparse.array>` and ``SparseList``
   data structures. SparseSeries now derives from SparseArray (:issue:`463`)
 - :ref:`Better console printing options <basics.console_output>` (:issue:`453`)
-- Implement fast :ref:`data ranking <computation.ranking>` for Series and
+- Implement fast data ranking for Series and
   DataFrame, fast versions of scipy.stats.rankdata (:issue:`428`)
 - Implement ``DataFrame.from_items`` alternate
   constructor (:issue:`444`)
diff --git a/doc/source/whatsnew/v0.8.0.rst b/doc/source/whatsnew/v0.8.0.rst
index 490175914cef1..ce02525a69ace 100644
--- a/doc/source/whatsnew/v0.8.0.rst
+++ b/doc/source/whatsnew/v0.8.0.rst
@@ -145,7 +145,7 @@ Other new features
 - Add :ref:`'kde' <visualization.kde>` plot option for density plots
 - Support for converting DataFrame to R data.frame through rpy2
 - Improved support for complex numbers in Series and DataFrame
-- Add :ref:`pct_change <computation.pct_change>` method to all data structures
+- Add ``pct_change`` method to all data structures
 - Add max_colwidth configuration option for DataFrame console output
 - :ref:`Interpolate <missing_data.interpolate>` Series values using index values
 - Can select multiple columns from GroupBy
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index cf1988808bbb0..9d17827d55951 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -9592,6 +9592,14 @@ def corr(
             DataFrame or Series.
         Series.corr : Compute the correlation between two Series.
 
+        Notes
+        -----
+        Pearson, Kendall and Spearman correlation are currently computed using pairwise complete observations.
+
+        * `Pearson correlation coefficient <https://en.wikipedia.org/wiki/Pearson_correlation_coefficient>`_
+        * `Kendall rank correlation coefficient <https://en.wikipedia.org/wiki/Kendall_rank_correlation_coefficient>`_
+        * `Spearman's rank correlation coefficient <https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient>`_
+
         Examples
         --------
         >>> def histogram_intersection(a, b):
@@ -9603,7 +9611,14 @@ def corr(
               dogs  cats
         dogs   1.0   0.3
         cats   0.3   1.0
-        """
+
+        >>> df = pd.DataFrame([(1, 1), (2, np.nan), (np.nan, 3), (4, 4)],
+        ...                   columns=['dogs', 'cats'])
+        >>> df.corr(min_periods=3)
+              dogs  cats
+        dogs   1.0   NaN
+        cats   NaN   1.0
+        """  # noqa:E501
         numeric_df = self._get_numeric_data()
         cols = numeric_df.columns
         idx = cols.copy()
@@ -9797,7 +9812,28 @@ def corrwith(self, other, axis: Axis = 0, drop=False, method="pearson") -> Serie
         See Also
         --------
         DataFrame.corr : Compute pairwise correlation of columns.
-        """
+
+        Examples
+        --------
+        >>> index = ["a", "b", "c", "d", "e"]
+        >>> columns = ["one", "two", "three", "four"]
+        >>> df1 = pd.DataFrame(np.arange(20).reshape(5, 4), index=index, columns=columns)
+        >>> df2 = pd.DataFrame(np.arange(16).reshape(4, 4), index=index[:4], columns=columns)
+        >>> df1.corrwith(df2)
+        one      1.0
+        two      1.0
+        three    1.0
+        four     1.0
+        dtype: float64
+
+        >>> df2.corrwith(df1, axis=1)
+        a    1.0
+        b    1.0
+        c    1.0
+        d    1.0
+        e    NaN
+        dtype: float64
+        """  # noqa:E501
         axis = self._get_axis_number(axis)
         this = self._get_numeric_data()
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 9a9697b201b43..fc682b848b054 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -8522,6 +8522,18 @@ def rank(
         3   spider          8.0
         4    snake          NaN
 
+        Ties are assigned the mean of the ranks (by default) for the group.
+
+        >>> s = pd.Series(range(5), index=list("abcde"))
+        >>> s["d"] = s["b"]
+        >>> s.rank()
+        a    1.0
+        b    2.5
+        c    4.0
+        d    2.5
+        e    5.0
+        dtype: float64
+
         The following example shows how the method behaves with the above
         parameters:
 
@@ -10251,7 +10263,7 @@ def pct_change(
         periods : int, default 1
             Periods to shift for forming percent change.
         fill_method : str, default 'pad'
-            How to handle NAs before computing percent changes.
+            How to handle NAs **before** computing percent changes.
         limit : int, default None
             The number of consecutive NAs to fill before stopping.
         freq : DateOffset, timedelta, or str, optional
diff --git a/pandas/core/series.py b/pandas/core/series.py
index e565e124ac7f9..78f353ff8c70c 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2566,6 +2566,14 @@ def corr(self, other, method="pearson", min_periods=None) -> float:
         DataFrame.corrwith : Compute pairwise correlation with another
             DataFrame or Series.
 
+        Notes
+        -----
+        Pearson, Kendall and Spearman correlation are currently computed using pairwise complete observations.
+
+        * `Pearson correlation coefficient <https://en.wikipedia.org/wiki/Pearson_correlation_coefficient>`_
+        * `Kendall rank correlation coefficient <https://en.wikipedia.org/wiki/Kendall_rank_correlation_coefficient>`_
+        * `Spearman's rank correlation coefficient <https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient>`_
+
         Examples
         --------
         >>> def histogram_intersection(a, b):
@@ -2575,7 +2583,7 @@ def corr(self, other, method="pearson", min_periods=None) -> float:
         >>> s2 = pd.Series([.3, .6, .0, .1])
         >>> s1.corr(s2, method=histogram_intersection)
         0.3
-        """
+        """  # noqa:E501
         this, other = self.align(other, join="inner", copy=False)
         if len(this) == 0:
             return np.nan