-
Notifications
You must be signed in to change notification settings - Fork 180
/
config.yaml
38 lines (38 loc) · 1.34 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
main:
components_repository: "https://github.com/udacity/Project-Build-an-ML-Pipeline-Starter.git#components"
# All the intermediate files will be copied to this directory at the end of the run.
# Set this to null if you are running in prod
project_name: nyc_airbnb
experiment_name: development
steps: all
etl:
sample: "sample1.csv"
min_price: 10 # dollars
max_price: 350 # dollars
data_check:
kl_threshold: 0.2
modeling:
# Fraction of data to use for test (the remaining will be used for train and validation)
test_size: 0.2
# Fraction of remaining data to use for validation
val_size: 0.2
# Fix this for reproducibility, change to have new splits
random_seed: 42
# Column to use for stratification (use "none" for no stratification)
stratify_by: "neighbourhood_group"
# Maximum number of features to consider for the TFIDF applied to the title of the
# insertion (the column called "name")
max_tfidf_features: 5
# NOTE: you can put here any parameter that is accepted by the constructor of
# RandomForestRegressor. This is a subsample, but more could be added:
random_forest:
n_estimators: 100
max_depth: 15
min_samples_split: 4
min_samples_leaf: 3
# Here -1 means all available cores
n_jobs: -1
criterion: squared_error
max_features: 0.5
# DO not change the following
oob_score: true