From 7c786afe6902799ca39ffca6e94e254568c27165 Mon Sep 17 00:00:00 2001
From: sami bh <sami.boumaiza@ensta-paris.fr>
Date: Fri, 22 Dec 2023 18:27:27 +0100
Subject: [PATCH] UP my solution

---
 sklearn_questions.py | 61 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 49 insertions(+), 12 deletions(-)

diff --git a/sklearn_questions.py b/sklearn_questions.py
index fa02e0d..cf9e575 100644
--- a/sklearn_questions.py
+++ b/sklearn_questions.py
@@ -49,6 +49,7 @@
 """
 import numpy as np
 import pandas as pd
+from pandas.api.types import is_datetime64_any_dtype
 
 from sklearn.base import BaseEstimator
 from sklearn.base import ClassifierMixin
@@ -82,6 +83,12 @@ def fit(self, X, y):
         self : instance of KNearestNeighbors
             The current instance of the classifier
         """
+        X, y = check_X_y(X, y)
+        check_classification_targets(y)
+        self.n_features_in_ = X.shape[1]
+        self.classes_ = np.unique(y)
+        self.X_ = X
+        self.y_ = y
         return self
 
     def predict(self, X):
@@ -97,8 +104,21 @@ def predict(self, X):
         y : ndarray, shape (n_test_samples,)
             Predicted class labels for each test data sample.
         """
-        y_pred = np.zeros(X.shape[0])
-        return y_pred
+        check_is_fitted(self)
+        X = check_array(X)
+        distances = pairwise_distances(X, self.X_, metric="euclidean")
+        closest = np.argsort(distances, axis=1)[:, : self.n_neighbors]
+        classes = self.y_[closest]
+        y_pred = []
+        for row in range(classes.shape[0]):
+            _, idx, counts = np.unique(
+                classes[row], return_index=True, return_counts=True
+            )
+            index = idx[np.argmax(counts)]
+            prediction = classes[row][index]
+            y_pred.append(prediction)
+
+        return np.array(y_pred)
 
     def score(self, X, y):
         """Calculate the score of the prediction.
@@ -115,7 +135,8 @@ def score(self, X, y):
         score : float
             Accuracy of the model computed for the (X, y) pairs.
         """
-        return 0.
+        y_pred = self.predict(X)
+        return np.mean(y_pred == y)
 
 
 class MonthlySplit(BaseCrossValidator):
@@ -134,7 +155,7 @@ class MonthlySplit(BaseCrossValidator):
         To use the index as column just set `time_col` to `'index'`.
     """
 
-    def __init__(self, time_col='index'):  # noqa: D107
+    def __init__(self, time_col="index"):  # noqa: D107
         self.time_col = time_col
 
     def get_n_splits(self, X, y=None, groups=None):
@@ -155,7 +176,21 @@ def get_n_splits(self, X, y=None, groups=None):
         n_splits : int
             The number of splits.
         """
-        return 0
+        _X = pd.DataFrame(X)
+        if "index" not in _X.columns:
+            _X = _X.reset_index(names="index")
+        if not is_datetime64_any_dtype(_X[self.time_col]):
+            raise ValueError(
+                f"Column {self.time_col} is not a datetime column."
+            )
+        max_date = _X[self.time_col].max()
+        min_date = _X[self.time_col].min()
+        res = (
+            12 * (max_date.year - min_date.year)
+            + max_date.month
+            - min_date.month
+        )
+        return res
 
     def split(self, X, y, groups=None):
         """Generate indices to split data into training and test set.
@@ -178,11 +213,13 @@ def split(self, X, y, groups=None):
             The testing set indices for that split.
         """
 
-        n_samples = X.shape[0]
-        n_splits = self.get_n_splits(X, y, groups)
+        X_copy = X.reset_index()
+        n_splits = self.get_n_splits(X_copy, y, groups)
+        X_grouped = X_copy.sort_values(by=self.time_col).groupby(
+            pd.Grouper(key=self.time_col, freq="M")
+        )
+        idxs = [group.index for _, group in X_grouped]
         for i in range(n_splits):
-            idx_train = range(n_samples)
-            idx_test = range(n_samples)
-            yield (
-                idx_train, idx_test
-            )
+            idx_train = list(idxs[i])
+            idx_test = list(idxs[i + 1])
+            yield (idx_train, idx_test)