Skip to content

Commit

Permalink
Third assignment
Browse files Browse the repository at this point in the history
  • Loading branch information
IonPanteleiciuc committed Dec 21, 2023
1 parent 6ccb1be commit 37f4df5
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 11 deletions.
7 changes: 7 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"python.testing.pytestArgs": [
"."
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
}
60 changes: 49 additions & 11 deletions sklearn_questions.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
from sklearn.utils.validation import check_array
from sklearn.utils.multiclass import check_classification_targets
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.utils.multiclass import unique_labels


class KNearestNeighbors(BaseEstimator, ClassifierMixin):
Expand All @@ -82,6 +83,21 @@ def fit(self, X, y):
self : instance of KNearestNeighbors
The current instance of the classifier
"""
X, y = check_X_y(X, y)
check_classification_targets(y)
self.n_features_in_ = X.shape[1]
self.classes_ = unique_labels(y)
if len(self.classes_) < 2:
raise ValueError("You need more than one class.")
self.X_ = X
self.y_ = y

if self.n_neighbors > len(self.X_):
raise ValueError(
"Number of neighbors cannot be greater \
the number of training samples."
)

return self

def predict(self, X):
Expand All @@ -97,7 +113,15 @@ def predict(self, X):
y : ndarray, shape (n_test_samples,)
Predicted class labels for each test data sample.
"""
y_pred = np.zeros(X.shape[0])
check_is_fitted(self)
X = check_array(X)
y_pred = []
for i, x in enumerate(X):
distances = pairwise_distances(x.reshape(1, -1), self.X_)
idx = np.argsort(distances, axis=1)[0][:self.n_neighbors]
values, counts = np.unique(self.y_[idx], return_counts=True)
y_pred.append(values[np.argmax(counts)])
y_pred = np.array(y_pred)
return y_pred

def score(self, X, y):
Expand All @@ -115,7 +139,10 @@ def score(self, X, y):
score : float
Accuracy of the model computed for the (X, y) pairs.
"""
return 0.
check_is_fitted(self)
X = check_array(X)
y_pred = self.predict(X)
return np.mean(y == y_pred)


class MonthlySplit(BaseCrossValidator):
Expand Down Expand Up @@ -155,7 +182,15 @@ def get_n_splits(self, X, y=None, groups=None):
n_splits : int
The number of splits.
"""
return 0
X_copy = X.reset_index() if self.time_col == 'index' else X.copy()
if X_copy[self.time_col].dtype != 'datetime64[ns]':
raise ValueError(
f"The column '{self.time_col}' is not a datetime."
)
X_copy.sort_values(by=self.time_col, inplace=True)
month_changes = X_copy[self.time_col].dt.month.diff().ne(0)
n_splits = month_changes.sum() - 1
return n_splits

def split(self, X, y, groups=None):
"""Generate indices to split data into training and test set.
Expand All @@ -177,12 +212,15 @@ def split(self, X, y, groups=None):
idx_test : ndarray
The testing set indices for that split.
"""

n_samples = X.shape[0]
n_splits = self.get_n_splits(X, y, groups)
X_copy = X.reset_index()
n_splits = self.get_n_splits(X_copy, y, groups)
X_grouped = X_copy.sort_values(
by=self.time_col
).groupby(
pd.Grouper(key=self.time_col, freq="M")
)
idxs = [group.index for _, group in X_grouped]
for i in range(n_splits):
idx_train = range(n_samples)
idx_test = range(n_samples)
yield (
idx_train, idx_test
)
idx_train = idxs[i].tolist()
idx_test = idxs[i+1].tolist()
yield idx_train, idx_test

0 comments on commit 37f4df5

Please sign in to comment.