UP my solution

x-datascience-datacamp · Dec 21, 2023 · 3d08a92 · 3d08a92
1 parent 6ccb1be
commit 3d08a92
Showing 1 changed file with 43 additions and 12 deletions.
diff --git a/sklearn_questions.py b/sklearn_questions.py
@@ -65,6 +65,9 @@ class KNearestNeighbors(BaseEstimator, ClassifierMixin):
     """KNearestNeighbors classifier."""
 
     def __init__(self, n_neighbors=1):  # noqa: D107
+        if n_neighbors <= 0:
+            raise ValueError("n_neighbors is negative")
+
         self.n_neighbors = n_neighbors
 
     def fit(self, X, y):
@@ -82,6 +85,12 @@ def fit(self, X, y):
         self : instance of KNearestNeighbors
             The current instance of the classifier
         """
+        X, y = check_X_y(X, y)
+        check_classification_targets(y)
+        self.classes_ = np.unique(y)
+        self.n_feat_ = X.shape[1]
+        self.X_train_ = X
+        self.y_train_ = y
         return self
 
     def predict(self, X):
@@ -97,8 +106,15 @@ def predict(self, X):
         y : ndarray, shape (n_test_samples,)
             Predicted class labels for each test data sample.
         """
-        y_pred = np.zeros(X.shape[0])
-        return y_pred
+        check_is_fitted(self)
+        X = check_array(X)
+        y_pred = []
+        for k in range(X.shape[0]):
+            dist = pairwise_distances(X[k].reshape(1, -1), self.X_train_)
+            dist = np.argsort(dist)
+            neighbors = self.y_train_[dist[0][: self.n_neighbors]]
+            y_pred.append(max(set(neighbors.tolist()), key=neighbors.tolist().count))
+        return np.array(y_pred)
 
     def score(self, X, y):
         """Calculate the score of the prediction.
@@ -115,7 +131,11 @@ def score(self, X, y):
         score : float
             Accuracy of the model computed for the (X, y) pairs.
         """
-        return 0.
+        check_is_fitted(self)
+        X = check_array(X)
+        y_pred = self.predict(X)
+        score = np.mean(y_pred == y)
+        return score
 
 
 class MonthlySplit(BaseCrossValidator):
@@ -134,7 +154,7 @@ class MonthlySplit(BaseCrossValidator):
         To use the index as column just set `time_col` to `'index'`.
     """
 
-    def __init__(self, time_col='index'):  # noqa: D107
+    def __init__(self, time_col="index"):  # noqa: D107
         self.time_col = time_col
 
     def get_n_splits(self, X, y=None, groups=None):
@@ -155,7 +175,11 @@ def get_n_splits(self, X, y=None, groups=None):
         n_splits : int
             The number of splits.
         """
-        return 0
+        X = X.reset_index()
+        if not isinstance(X[self.time_col][0], pd.Timestamp):
+            raise ValueError("Not type datetime.")
+        n_splits = X[self.time_col].dt.to_period("M").nunique() - 1
+        return n_splits
 
     def split(self, X, y, groups=None):
         """Generate indices to split data into training and test set.
@@ -177,12 +201,19 @@ def split(self, X, y, groups=None):
         idx_test : ndarray
             The testing set indices for that split.
         """
-
-        n_samples = X.shape[0]
+        X = X.reset_index()
         n_splits = self.get_n_splits(X, y, groups)
+        X_s = X.resample("M", on=self.time_col).count().sort_index().index
+        y_s = X_s.map(lambda x: (x.year, x.month))
         for i in range(n_splits):
-            idx_train = range(n_samples)
-            idx_test = range(n_samples)
-            yield (
-                idx_train, idx_test
-            )
+            idx_train = X[
+                (X[self.time_col].dt.month == y_s[i][1])
+                & (X[self.time_col].dt.year == y_s[i][0])
+            ].index.to_numpy()
+
+            idx_test = X[
+                (X[self.time_col].dt.month == y_s[i + 1][1])
+                & (X[self.time_col].dt.year == y_s[i + 1][0])
+            ].index.to_numpy()
+
+            yield (idx_train, idx_test)