From 46bfbafc66ad4b96005a14c08716e7621e7f0770 Mon Sep 17 00:00:00 2001 From: Lily W Date: Wed, 18 Dec 2024 13:40:46 -0500 Subject: [PATCH 1/3] moved n_components in cluster estimation to config.yaml --- config.yaml | 1 + modules/cluster_estimation/cluster_estimation.py | 12 +++++++++--- .../cluster_estimation/cluster_estimation_worker.py | 5 +++++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/config.yaml b/config.yaml index c10d0c4e..18132c31 100644 --- a/config.yaml +++ b/config.yaml @@ -38,6 +38,7 @@ geolocation: cluster_estimation: min_activation_threshold: 25 min_new_points_to_run: 5 + max_num_components: 10 random_state: 0 communications: diff --git a/modules/cluster_estimation/cluster_estimation.py b/modules/cluster_estimation/cluster_estimation.py index ae7ff0b3..60a9d491 100644 --- a/modules/cluster_estimation/cluster_estimation.py +++ b/modules/cluster_estimation/cluster_estimation.py @@ -28,6 +28,9 @@ class ClusterEstimation: min_new_points_to_run: int Minimum number of new data points that must be collected before running model. + max_num_components: int + Max number of real landing pads. + random_state: int Seed for randomizer, to get consistent results. @@ -62,8 +65,8 @@ class ClusterEstimation: __MEAN_PRECISION_PRIOR = 1e-6 __MAX_MODEL_ITERATIONS = 1000 - # Real-world scenario Hyperparameters - __MAX_NUM_COMPONENTS = 10 # assumed maximum number of real landing pads + # # Real-world scenario Hyperparameters + # __MAX_NUM_COMPONENTS = 10 # assumed maximum number of real landing pads # Hyperparameters to clean up model outputs __WEIGHT_DROP_THRESHOLD = 0.1 @@ -74,6 +77,7 @@ def create( cls, min_activation_threshold: int, min_new_points_to_run: int, + max_num_components: int, random_state: int, local_logger: logger.Logger, ) -> "tuple[bool, ClusterEstimation | None]": @@ -92,6 +96,7 @@ def create( cls.__create_key, min_activation_threshold, min_new_points_to_run, + max_num_components, random_state, local_logger, ) @@ -101,6 +106,7 @@ def __init__( class_private_create_key: object, min_activation_threshold: int, min_new_points_to_run: int, + max_num_components: int, random_state: int, local_logger: logger.Logger, ) -> None: @@ -112,7 +118,7 @@ def __init__( # Initializes VGMM self.__vgmm = sklearn.mixture.BayesianGaussianMixture( covariance_type=self.__COVAR_TYPE, - n_components=self.__MAX_NUM_COMPONENTS, + n_components = max_num_components, init_params=self.__MODEL_INIT_PARAM, weight_concentration_prior=self.__WEIGHT_CONCENTRATION_PRIOR, mean_precision_prior=self.__MEAN_PRECISION_PRIOR, diff --git a/modules/cluster_estimation/cluster_estimation_worker.py b/modules/cluster_estimation/cluster_estimation_worker.py index f10c8313..17c58765 100644 --- a/modules/cluster_estimation/cluster_estimation_worker.py +++ b/modules/cluster_estimation/cluster_estimation_worker.py @@ -14,6 +14,7 @@ def cluster_estimation_worker( min_activation_threshold: int, min_new_points_to_run: int, + max_num_components: int, random_state: int, input_queue: queue_proxy_wrapper.QueueProxyWrapper, output_queue: queue_proxy_wrapper.QueueProxyWrapper, @@ -30,6 +31,9 @@ def cluster_estimation_worker( min_new_points_to_run: int Minimum number of new data points that must be collected before running model. + max_num_components: int + Max number of real landing pads. + random_state: int Seed for randomizer, to get consistent results. @@ -56,6 +60,7 @@ def cluster_estimation_worker( result, estimator = cluster_estimation.ClusterEstimation.create( min_activation_threshold, min_new_points_to_run, + max_num_components, random_state, local_logger, ) From f9c10abc08507aa9d0948cdf13d90e3a282dc05d Mon Sep 17 00:00:00 2001 From: Lily W Date: Wed, 18 Dec 2024 13:46:30 -0500 Subject: [PATCH 2/3] moved n_components from cluster estimation to config.yaml --- main_2024.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/main_2024.py b/main_2024.py index 9c59908d..0e07ea52 100644 --- a/main_2024.py +++ b/main_2024.py @@ -116,6 +116,7 @@ def main() -> int: MIN_ACTIVATION_THRESHOLD = config["cluster_estimation"]["min_activation_threshold"] MIN_NEW_POINTS_TO_RUN = config["cluster_estimation"]["min_new_points_to_run"] + MAX_NUM_COMPONENTS = config["cluster_estimation"]["max_num_components"] RANDOM_STATE = config["cluster_estimation"]["random_state"] COMMUNICATIONS_TIMEOUT = config["communications"]["timeout"] @@ -303,7 +304,7 @@ def main() -> int: result, cluster_estimation_worker_properties = worker_manager.WorkerProperties.create( count=1, target=cluster_estimation_worker.cluster_estimation_worker, - work_arguments=(MIN_ACTIVATION_THRESHOLD, MIN_NEW_POINTS_TO_RUN, RANDOM_STATE), + work_arguments=(MIN_ACTIVATION_THRESHOLD, MIN_NEW_POINTS_TO_RUN, MAX_NUM_COMPONENTS, RANDOM_STATE), input_queues=[geolocation_to_cluster_estimation_queue], output_queues=[cluster_estimation_to_communications_queue], controller=controller, From af1146b9e42c4d69e371f7f6bd517f5762ae633f Mon Sep 17 00:00:00 2001 From: Lily W Date: Sun, 22 Dec 2024 19:35:10 -0500 Subject: [PATCH 3/3] added check verifying max_num_components > 0 --- modules/cluster_estimation/cluster_estimation.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/cluster_estimation/cluster_estimation.py b/modules/cluster_estimation/cluster_estimation.py index 60a9d491..b1d8c652 100644 --- a/modules/cluster_estimation/cluster_estimation.py +++ b/modules/cluster_estimation/cluster_estimation.py @@ -65,9 +65,6 @@ class ClusterEstimation: __MEAN_PRECISION_PRIOR = 1e-6 __MAX_MODEL_ITERATIONS = 1000 - # # Real-world scenario Hyperparameters - # __MAX_NUM_COMPONENTS = 10 # assumed maximum number of real landing pads - # Hyperparameters to clean up model outputs __WEIGHT_DROP_THRESHOLD = 0.1 __MAX_COVARIANCE_THRESHOLD = 10 @@ -92,6 +89,10 @@ def create( if min_activation_threshold < 1: return False, None + #This must be greater than 0 + if max_num_components < 0: + return False, None + return True, ClusterEstimation( cls.__create_key, min_activation_threshold,