From ca67b98b379c04aa8ef10ae68ab8380580307401 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Tue, 12 Nov 2024 12:03:55 -0600
Subject: [PATCH] breaking: drop rows with null targets when `dropna=False`
 (#447)

---
 mlforecast/core.py                            |  23 +-
 nbs/core.ipynb                                |  63 +++---
 .../quick_start_distributed.ipynb             | 207 +++++++++---------
 nbs/target_transforms.ipynb                   |   3 +-
 4 files changed, 162 insertions(+), 134 deletions(-)

diff --git a/mlforecast/core.py b/mlforecast/core.py
index 846330e3..6357ab07 100644
--- a/mlforecast/core.py
+++ b/mlforecast/core.py
@@ -402,16 +402,23 @@ def _transform(
             target = target[self._restore_idxs]
 
         # determine rows to keep
+        target_nulls = np.isnan(target)
+        if target_nulls.ndim == 2:
+            # target nulls for each horizon are dropped in MLForecast.fit_models
+            # we just drop rows here for which all the target values are null
+            target_nulls = target_nulls.all(axis=1)
         if dropna:
             feature_nulls = np.full(df.shape[0], False)
             for feature_vals in features.values():
                 feature_nulls |= np.isnan(feature_vals)
-            target_nulls = np.isnan(target)
-            if target_nulls.ndim == 2:
-                # target nulls for each horizon are dropped in MLForecast.fit_models
-                # we just drop rows here for which all the target values are null
-                target_nulls = target_nulls.all(axis=1)
             keep_rows = ~(feature_nulls | target_nulls)
+        else:
+            # we always want to drop rows with nulls in the target
+            keep_rows = ~target_nulls
+
+        self._dropped_series: Optional[np.ndarray] = None
+        if not keep_rows.all():
+            # remove rows with nulls
             for k, v in features.items():
                 features[k] = v[keep_rows]
             target = target[keep_rows]
@@ -422,7 +429,7 @@ def _transform(
                 last_idxs = self._sort_idxs[last_idxs]
             last_vals_nan = ~keep_rows[last_idxs]
             if last_vals_nan.any():
-                self._dropped_series: Optional[np.ndarray] = np.where(last_vals_nan)[0]
+                self._dropped_series = np.where(last_vals_nan)[0]
                 dropped_ids = reprlib.repr(list(self.uids[self._dropped_series]))
                 warnings.warn(
                     "The following series were dropped completely "
@@ -430,11 +437,9 @@ def _transform(
                     "These series won't show up if you use `MLForecast.forecast_fitted_values()`.\n"
                     "You can set `dropna=False` or use transformations that require less samples to mitigate this"
                 )
-            else:
-                self._dropped_series = None
         elif isinstance(df, pd.DataFrame):
+            # we'll be assigning columns below, so we need to copy
             df = df.copy(deep=False)
-            self._dropped_series = None
 
         # once we've computed the features and target we can slice the series
         update_samples = [
diff --git a/nbs/core.ipynb b/nbs/core.ipynb
index df3e198c..96a58a49 100644
--- a/nbs/core.ipynb
+++ b/nbs/core.ipynb
@@ -20,16 +20,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The autoreload extension is already loaded. To reload it, use:\n",
-      "  %reload_ext autoreload\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "#|hide\n",
     "%load_ext autoreload\n",
@@ -895,16 +886,23 @@
     "            target = target[self._restore_idxs]       \n",
     "\n",
     "        # determine rows to keep\n",
+    "        target_nulls = np.isnan(target)\n",
+    "        if target_nulls.ndim == 2:\n",
+    "            # target nulls for each horizon are dropped in MLForecast.fit_models\n",
+    "            # we just drop rows here for which all the target values are null\n",
+    "            target_nulls = target_nulls.all(axis=1)\n",
     "        if dropna:\n",
     "            feature_nulls = np.full(df.shape[0], False)\n",
     "            for feature_vals in features.values():\n",
     "                feature_nulls |= np.isnan(feature_vals)\n",
-    "            target_nulls = np.isnan(target)\n",
-    "            if target_nulls.ndim == 2:\n",
-    "                # target nulls for each horizon are dropped in MLForecast.fit_models\n",
-    "                # we just drop rows here for which all the target values are null\n",
-    "                target_nulls = target_nulls.all(axis=1)\n",
     "            keep_rows = ~(feature_nulls | target_nulls)\n",
+    "        else:\n",
+    "            # we always want to drop rows with nulls in the target\n",
+    "            keep_rows = ~target_nulls\n",
+    "\n",
+    "        self._dropped_series: Optional[np.ndarray] = None\n",
+    "        if not keep_rows.all():\n",
+    "            # remove rows with nulls\n",
     "            for k, v in features.items():\n",
     "                features[k] = v[keep_rows]\n",
     "            target = target[keep_rows]\n",
@@ -915,7 +913,7 @@
     "                last_idxs = self._sort_idxs[last_idxs]\n",
     "            last_vals_nan = ~keep_rows[last_idxs]\n",
     "            if last_vals_nan.any():\n",
-    "                self._dropped_series: Optional[np.ndarray] = np.where(last_vals_nan)[0]                \n",
+    "                self._dropped_series = np.where(last_vals_nan)[0]                \n",
     "                dropped_ids = reprlib.repr(list(self.uids[self._dropped_series]))\n",
     "                warnings.warn(\n",
     "                    \"The following series were dropped completely \"\n",
@@ -923,11 +921,9 @@
     "                    \"These series won't show up if you use `MLForecast.forecast_fitted_values()`.\\n\"\n",
     "                    \"You can set `dropna=False` or use transformations that require less samples to mitigate this\"\n",
     "                )\n",
-    "            else:\n",
-    "                self._dropped_series = None\n",
     "        elif isinstance(df, pd.DataFrame):\n",
+    "            # we'll be assigning columns below, so we need to copy\n",
     "            df = df.copy(deep=False)\n",
-    "            self._dropped_series = None\n",
     "\n",
     "        # once we've computed the features and target we can slice the series\n",
     "        update_samples = [\n",
@@ -1701,7 +1697,7 @@
       "text/markdown": [
        "---\n",
        "\n",
-       "[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/core.py#L496){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
+       "[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/core.py#L511){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
        "\n",
        "## TimeSeries.fit_transform\n",
        "\n",
@@ -1723,7 +1719,7 @@
       "text/plain": [
        "---\n",
        "\n",
-       "[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/core.py#L496){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
+       "[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/core.py#L511){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
        "\n",
        "## TimeSeries.fit_transform\n",
        "\n",
@@ -2016,7 +2012,7 @@
       "text/markdown": [
        "---\n",
        "\n",
-       "[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/core.py#L743){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
+       "[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/core.py#L758){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
        "\n",
        "## TimeSeries.predict\n",
        "\n",
@@ -2030,7 +2026,7 @@
       "text/plain": [
        "---\n",
        "\n",
-       "[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/core.py#L743){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
+       "[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/core.py#L758){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
        "\n",
        "## TimeSeries.predict\n",
        "\n",
@@ -2168,7 +2164,7 @@
       "text/markdown": [
        "---\n",
        "\n",
-       "[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/core.py#L848){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
+       "[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/core.py#L863){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
        "\n",
        "## TimeSeries.update\n",
        "\n",
@@ -2181,7 +2177,7 @@
       "text/plain": [
        "---\n",
        "\n",
-       "[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/core.py#L848){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
+       "[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/core.py#L863){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
        "\n",
        "## TimeSeries.update\n",
        "\n",
@@ -2575,6 +2571,23 @@
     "ts.fit_transform(series, 'unique_id', 'ds', 'y')\n",
     "assert ts.keep_last_n is None"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "# no target nulls when dropna=False\n",
+    "ts = TimeSeries(\n",
+    "    freq='D',\n",
+    "    lags=[1, 2],\n",
+    "    target_transforms=[Differences([5])],\n",
+    ")\n",
+    "prep = ts.fit_transform(series, 'unique_id', 'ds', 'y', dropna=False)\n",
+    "assert not prep['y'].isnull().any()"
+   ]
   }
  ],
  "metadata": {
diff --git a/nbs/docs/getting-started/quick_start_distributed.ipynb b/nbs/docs/getting-started/quick_start_distributed.ipynb
index 6f355810..4330ffdf 100644
--- a/nbs/docs/getting-started/quick_start_distributed.ipynb
+++ b/nbs/docs/getting-started/quick_start_distributed.ipynb
@@ -448,36 +448,36 @@
        "      <th>0</th>\n",
        "      <td>id_00</td>\n",
        "      <td>2002-09-27 00:00:00</td>\n",
-       "      <td>21.609526</td>\n",
-       "      <td>22.114111</td>\n",
+       "      <td>22.267619</td>\n",
+       "      <td>21.835798</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>id_00</td>\n",
        "      <td>2002-09-28 00:00:00</td>\n",
-       "      <td>85.623013</td>\n",
-       "      <td>84.309696</td>\n",
+       "      <td>85.230055</td>\n",
+       "      <td>83.996424</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>id_00</td>\n",
        "      <td>2002-09-29 00:00:00</td>\n",
-       "      <td>163.107685</td>\n",
-       "      <td>163.20679</td>\n",
+       "      <td>168.256154</td>\n",
+       "      <td>163.076652</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>id_00</td>\n",
        "      <td>2002-09-30 00:00:00</td>\n",
-       "      <td>246.96872</td>\n",
-       "      <td>245.510858</td>\n",
+       "      <td>246.712244</td>\n",
+       "      <td>245.827467</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>id_00</td>\n",
        "      <td>2002-10-01 00:00:00</td>\n",
-       "      <td>318.521367</td>\n",
-       "      <td>314.479718</td>\n",
+       "      <td>314.184225</td>\n",
+       "      <td>315.257849</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -485,11 +485,11 @@
       ],
       "text/plain": [
        "  unique_id                   ds  DaskXGBForecast  DaskLGBMForecast\n",
-       "0     id_00  2002-09-27 00:00:00        21.609526         22.114111\n",
-       "1     id_00  2002-09-28 00:00:00        85.623013         84.309696\n",
-       "2     id_00  2002-09-29 00:00:00       163.107685         163.20679\n",
-       "3     id_00  2002-09-30 00:00:00        246.96872        245.510858\n",
-       "4     id_00  2002-10-01 00:00:00       318.521367        314.479718"
+       "0     id_00  2002-09-27 00:00:00        22.267619         21.835798\n",
+       "1     id_00  2002-09-28 00:00:00        85.230055         83.996424\n",
+       "2     id_00  2002-09-29 00:00:00       168.256154        163.076652\n",
+       "3     id_00  2002-09-30 00:00:00       246.712244        245.827467\n",
+       "4     id_00  2002-10-01 00:00:00       314.184225        315.257849"
       ]
      },
      "execution_count": null,
@@ -792,68 +792,68 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>id_00</td>\n",
-       "      <td>2002-08-16 00:00:00</td>\n",
-       "      <td>23.192749</td>\n",
-       "      <td>21.986437</td>\n",
+       "      <th>61</th>\n",
+       "      <td>id_04</td>\n",
+       "      <td>2002-08-21 00:00:00</td>\n",
+       "      <td>68.3418</td>\n",
+       "      <td>68.944539</td>\n",
        "      <td>2002-08-15 00:00:00</td>\n",
-       "      <td>11.878591</td>\n",
+       "      <td>69.699857</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>30</th>\n",
-       "      <td>id_02</td>\n",
-       "      <td>2002-08-18 00:00:00</td>\n",
-       "      <td>96.59974</td>\n",
-       "      <td>96.568057</td>\n",
+       "      <th>83</th>\n",
+       "      <td>id_15</td>\n",
+       "      <td>2002-08-29 00:00:00</td>\n",
+       "      <td>199.315403</td>\n",
+       "      <td>199.663555</td>\n",
        "      <td>2002-08-15 00:00:00</td>\n",
-       "      <td>94.706551</td>\n",
+       "      <td>206.082864</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>80</th>\n",
-       "      <td>id_05</td>\n",
-       "      <td>2002-08-26 00:00:00</td>\n",
-       "      <td>257.210466</td>\n",
-       "      <td>255.908309</td>\n",
+       "      <th>103</th>\n",
+       "      <td>id_17</td>\n",
+       "      <td>2002-08-21 00:00:00</td>\n",
+       "      <td>156.822598</td>\n",
+       "      <td>158.018246</td>\n",
        "      <td>2002-08-15 00:00:00</td>\n",
-       "      <td>246.051086</td>\n",
+       "      <td>152.227984</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>36</th>\n",
-       "      <td>id_12</td>\n",
-       "      <td>2002-08-24 00:00:00</td>\n",
-       "      <td>401.081335</td>\n",
-       "      <td>401.697836</td>\n",
+       "      <th>61</th>\n",
+       "      <td>id_24</td>\n",
+       "      <td>2002-08-21 00:00:00</td>\n",
+       "      <td>136.598356</td>\n",
+       "      <td>136.576865</td>\n",
        "      <td>2002-08-15 00:00:00</td>\n",
-       "      <td>424.296882</td>\n",
+       "      <td>138.559945</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>91</th>\n",
-       "      <td>id_16</td>\n",
-       "      <td>2002-08-23 00:00:00</td>\n",
-       "      <td>315.036479</td>\n",
-       "      <td>315.368377</td>\n",
+       "      <th>36</th>\n",
+       "      <td>id_33</td>\n",
+       "      <td>2002-08-24 00:00:00</td>\n",
+       "      <td>95.6072</td>\n",
+       "      <td>96.249354</td>\n",
        "      <td>2002-08-15 00:00:00</td>\n",
-       "      <td>300.419406</td>\n",
+       "      <td>102.068997</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "   unique_id                   ds  DaskXGBForecast  DaskLGBMForecast  \\\n",
-       "0      id_00  2002-08-16 00:00:00        23.192749         21.986437   \n",
-       "30     id_02  2002-08-18 00:00:00         96.59974         96.568057   \n",
-       "80     id_05  2002-08-26 00:00:00       257.210466        255.908309   \n",
-       "36     id_12  2002-08-24 00:00:00       401.081335        401.697836   \n",
-       "91     id_16  2002-08-23 00:00:00       315.036479        315.368377   \n",
+       "    unique_id                   ds  DaskXGBForecast  DaskLGBMForecast  \\\n",
+       "61      id_04  2002-08-21 00:00:00          68.3418         68.944539   \n",
+       "83      id_15  2002-08-29 00:00:00       199.315403        199.663555   \n",
+       "103     id_17  2002-08-21 00:00:00       156.822598        158.018246   \n",
+       "61      id_24  2002-08-21 00:00:00       136.598356        136.576865   \n",
+       "36      id_33  2002-08-24 00:00:00          95.6072         96.249354   \n",
        "\n",
-       "                 cutoff           y  \n",
-       "0   2002-08-15 00:00:00   11.878591  \n",
-       "30  2002-08-15 00:00:00   94.706551  \n",
-       "80  2002-08-15 00:00:00  246.051086  \n",
-       "36  2002-08-15 00:00:00  424.296882  \n",
-       "91  2002-08-15 00:00:00  300.419406  "
+       "                  cutoff           y  \n",
+       "61   2002-08-15 00:00:00   69.699857  \n",
+       "83   2002-08-15 00:00:00  206.082864  \n",
+       "103  2002-08-15 00:00:00  152.227984  \n",
+       "61   2002-08-15 00:00:00  138.559945  \n",
+       "36   2002-08-15 00:00:00  102.068997  "
       ]
      },
      "execution_count": null,
@@ -918,7 +918,8 @@
     "    ),\n",
     "    static_features=['static_0', 'static_1'],\n",
     ")\n",
-    "assert reduced_train.groupby('unique_id').size().compute().max() == input_size"
+    "dropped_samples = fcst._base_ts.target_transforms[0].differences[0]\n",
+    "assert reduced_train.groupby('unique_id').size().compute().max() == input_size - dropped_samples"
    ]
   },
   {
@@ -1183,7 +1184,15 @@
    "execution_count": null,
    "id": "d06d2230-60f5-47f1-820b-af2ca7311b41",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "                                                                                \r"
+     ]
+    }
+   ],
    "source": [
     "preds = fcst.predict(7, X_df=future).toPandas()"
    ]
@@ -1226,35 +1235,35 @@
        "      <th>0</th>\n",
        "      <td>id_00</td>\n",
        "      <td>2002-09-27</td>\n",
-       "      <td>15.102403</td>\n",
+       "      <td>15.053577</td>\n",
        "      <td>18.631477</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>id_00</td>\n",
        "      <td>2002-09-28</td>\n",
-       "      <td>92.980261</td>\n",
+       "      <td>93.010037</td>\n",
        "      <td>93.796269</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>id_00</td>\n",
        "      <td>2002-09-29</td>\n",
-       "      <td>160.090375</td>\n",
+       "      <td>160.120148</td>\n",
        "      <td>159.582315</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>id_00</td>\n",
        "      <td>2002-09-30</td>\n",
-       "      <td>250.416113</td>\n",
+       "      <td>250.445885</td>\n",
        "      <td>250.861651</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>id_00</td>\n",
        "      <td>2002-10-01</td>\n",
-       "      <td>323.306184</td>\n",
+       "      <td>323.335956</td>\n",
        "      <td>321.564089</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -1263,11 +1272,11 @@
       ],
       "text/plain": [
        "  unique_id         ds  SparkLGBMForecast  SparkXGBForecast\n",
-       "0     id_00 2002-09-27          15.102403         18.631477\n",
-       "1     id_00 2002-09-28          92.980261         93.796269\n",
-       "2     id_00 2002-09-29         160.090375        159.582315\n",
-       "3     id_00 2002-09-30         250.416113        250.861651\n",
-       "4     id_00 2002-10-01         323.306184        321.564089"
+       "0     id_00 2002-09-27          15.053577         18.631477\n",
+       "1     id_00 2002-09-28          93.010037         93.796269\n",
+       "2     id_00 2002-09-29         160.120148        159.582315\n",
+       "3     id_00 2002-09-30         250.445885        250.861651\n",
+       "4     id_00 2002-10-01         323.335956        321.564089"
       ]
      },
      "execution_count": null,
@@ -2008,48 +2017,48 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>id_04</td>\n",
-       "      <td>2002-09-20</td>\n",
-       "      <td>118.982094</td>\n",
-       "      <td>117.577477</td>\n",
+       "      <td>id_05</td>\n",
+       "      <td>2002-09-21</td>\n",
+       "      <td>108.285187</td>\n",
+       "      <td>108.619698</td>\n",
        "      <td>2002-09-12</td>\n",
-       "      <td>118.603489</td>\n",
+       "      <td>108.726387</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>id_04</td>\n",
-       "      <td>2002-09-24</td>\n",
-       "      <td>51.461491</td>\n",
-       "      <td>50.120552</td>\n",
+       "      <td>id_08</td>\n",
+       "      <td>2002-09-16</td>\n",
+       "      <td>26.287956</td>\n",
+       "      <td>26.589603</td>\n",
        "      <td>2002-09-12</td>\n",
-       "      <td>52.668389</td>\n",
+       "      <td>27.980670</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>id_05</td>\n",
-       "      <td>2002-09-20</td>\n",
-       "      <td>27.594826</td>\n",
-       "      <td>24.421537</td>\n",
+       "      <td>id_08</td>\n",
+       "      <td>2002-09-25</td>\n",
+       "      <td>83.210945</td>\n",
+       "      <td>84.194962</td>\n",
        "      <td>2002-09-12</td>\n",
-       "      <td>20.120710</td>\n",
+       "      <td>86.344885</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>id_05</td>\n",
-       "      <td>2002-09-25</td>\n",
-       "      <td>411.615204</td>\n",
-       "      <td>412.093384</td>\n",
+       "      <td>id_11</td>\n",
+       "      <td>2002-09-22</td>\n",
+       "      <td>416.994843</td>\n",
+       "      <td>417.106506</td>\n",
        "      <td>2002-09-12</td>\n",
-       "      <td>419.621422</td>\n",
+       "      <td>425.434661</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>id_08</td>\n",
-       "      <td>2002-09-25</td>\n",
-       "      <td>83.210945</td>\n",
-       "      <td>83.842705</td>\n",
+       "      <td>id_16</td>\n",
+       "      <td>2002-09-14</td>\n",
+       "      <td>377.916382</td>\n",
+       "      <td>375.421600</td>\n",
        "      <td>2002-09-12</td>\n",
-       "      <td>86.344885</td>\n",
+       "      <td>400.361977</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -2057,11 +2066,11 @@
       ],
       "text/plain": [
        "  unique_id         ds  RayLGBMForecast  RayXGBForecast     cutoff           y\n",
-       "0     id_04 2002-09-20       118.982094      117.577477 2002-09-12  118.603489\n",
-       "1     id_04 2002-09-24        51.461491       50.120552 2002-09-12   52.668389\n",
-       "2     id_05 2002-09-20        27.594826       24.421537 2002-09-12   20.120710\n",
-       "3     id_05 2002-09-25       411.615204      412.093384 2002-09-12  419.621422\n",
-       "4     id_08 2002-09-25        83.210945       83.842705 2002-09-12   86.344885"
+       "0     id_05 2002-09-21       108.285187      108.619698 2002-09-12  108.726387\n",
+       "1     id_08 2002-09-16        26.287956       26.589603 2002-09-12   27.980670\n",
+       "2     id_08 2002-09-25        83.210945       84.194962 2002-09-12   86.344885\n",
+       "3     id_11 2002-09-22       416.994843      417.106506 2002-09-12  425.434661\n",
+       "4     id_16 2002-09-14       377.916382      375.421600 2002-09-12  400.361977"
       ]
      },
      "execution_count": null,
diff --git a/nbs/target_transforms.ipynb b/nbs/target_transforms.ipynb
index 589b9c57..b6163f55 100644
--- a/nbs/target_transforms.ipynb
+++ b/nbs/target_transforms.ipynb
@@ -732,6 +732,7 @@
     "        sk_boxcox.fit_transform(series[['y']])[:, 0], index=series['unique_id']\n",
     "    ).groupby('unique_id', observed=True)\n",
     "    .diff()\n",
+    "    .dropna()\n",
     "    .values\n",
     ")\n",
     "np.testing.assert_allclose(prep['y'].values, expected)"
@@ -754,7 +755,7 @@
     "    target_transforms=[boxcox_global, single_difference]\n",
     ")\n",
     "prep_pl = fcst_pl.preprocess(series_pl, dropna=False)\n",
-    "pd.testing.assert_frame_equal(prep, prep_pl.to_pandas())"
+    "pd.testing.assert_frame_equal(prep.reset_index(drop=True), prep_pl.to_pandas())"
    ]
   }
  ],