Skip to content

Commit

Permalink
UP my solution
Browse files Browse the repository at this point in the history
  • Loading branch information
BusraBulut222 committed Dec 21, 2023
1 parent 6ccb1be commit 3d08a92
Showing 1 changed file with 43 additions and 12 deletions.
55 changes: 43 additions & 12 deletions sklearn_questions.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ class KNearestNeighbors(BaseEstimator, ClassifierMixin):
"""KNearestNeighbors classifier."""

def __init__(self, n_neighbors=1): # noqa: D107
if n_neighbors <= 0:
raise ValueError("n_neighbors is negative")

self.n_neighbors = n_neighbors

def fit(self, X, y):
Expand All @@ -82,6 +85,12 @@ def fit(self, X, y):
self : instance of KNearestNeighbors
The current instance of the classifier
"""
X, y = check_X_y(X, y)
check_classification_targets(y)
self.classes_ = np.unique(y)
self.n_feat_ = X.shape[1]
self.X_train_ = X
self.y_train_ = y
return self

def predict(self, X):
Expand All @@ -97,8 +106,15 @@ def predict(self, X):
y : ndarray, shape (n_test_samples,)
Predicted class labels for each test data sample.
"""
y_pred = np.zeros(X.shape[0])
return y_pred
check_is_fitted(self)
X = check_array(X)
y_pred = []
for k in range(X.shape[0]):
dist = pairwise_distances(X[k].reshape(1, -1), self.X_train_)
dist = np.argsort(dist)
neighbors = self.y_train_[dist[0][: self.n_neighbors]]
y_pred.append(max(set(neighbors.tolist()), key=neighbors.tolist().count))
return np.array(y_pred)

def score(self, X, y):
"""Calculate the score of the prediction.
Expand All @@ -115,7 +131,11 @@ def score(self, X, y):
score : float
Accuracy of the model computed for the (X, y) pairs.
"""
return 0.
check_is_fitted(self)
X = check_array(X)
y_pred = self.predict(X)
score = np.mean(y_pred == y)
return score


class MonthlySplit(BaseCrossValidator):
Expand All @@ -134,7 +154,7 @@ class MonthlySplit(BaseCrossValidator):
To use the index as column just set `time_col` to `'index'`.
"""

def __init__(self, time_col='index'): # noqa: D107
def __init__(self, time_col="index"): # noqa: D107
self.time_col = time_col

def get_n_splits(self, X, y=None, groups=None):
Expand All @@ -155,7 +175,11 @@ def get_n_splits(self, X, y=None, groups=None):
n_splits : int
The number of splits.
"""
return 0
X = X.reset_index()
if not isinstance(X[self.time_col][0], pd.Timestamp):
raise ValueError("Not type datetime.")
n_splits = X[self.time_col].dt.to_period("M").nunique() - 1
return n_splits

def split(self, X, y, groups=None):
"""Generate indices to split data into training and test set.
Expand All @@ -177,12 +201,19 @@ def split(self, X, y, groups=None):
idx_test : ndarray
The testing set indices for that split.
"""

n_samples = X.shape[0]
X = X.reset_index()
n_splits = self.get_n_splits(X, y, groups)
X_s = X.resample("M", on=self.time_col).count().sort_index().index
y_s = X_s.map(lambda x: (x.year, x.month))
for i in range(n_splits):
idx_train = range(n_samples)
idx_test = range(n_samples)
yield (
idx_train, idx_test
)
idx_train = X[
(X[self.time_col].dt.month == y_s[i][1])
& (X[self.time_col].dt.year == y_s[i][0])
].index.to_numpy()

idx_test = X[
(X[self.time_col].dt.month == y_s[i + 1][1])
& (X[self.time_col].dt.year == y_s[i + 1][0])
].index.to_numpy()

yield (idx_train, idx_test)

0 comments on commit 3d08a92

Please sign in to comment.