forked from YahooArchive/anthelion
-
Notifications
You must be signed in to change notification settings - Fork 0
/
anth_classifier_baseline.properties
executable file
·40 lines (40 loc) · 2.07 KB
/
anth_classifier_baseline.properties
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# the bandit lambda which sets the proportion of random selections
bandit.lambda=0.0
# the size of the domain queue to be used within Anthelion. As the domains
# are kept in cache anyway in the queue just has references, the size
# should only influence the needed memory slightly. As the Score of a
# domain takes also the times the same domain is enqueued into account this
# has an effect on the variation of domains picked.
domain.queue.size=1000
# the minimal number of domains added in the overall knownDomain list,
# before Anthelion starts considering any domains to be enqueued
domain.known.min=1
# Sets the number of seconds to wait for adding a domain to
# queue - as the size of the queue is limited
# time in seconds
domain.queue.offertime=3600
# class name of the value function which should be used
# the class must implement the interface DomainValueFunction.java
domain.value.function=com.yahoo.research.robme.anthelion.models.banditfunction.BestScoreFunction
# the size of the used attributes for the hash trick
classifier.hashtricksize=10000
# the name of the classifier which should be used
# possible values: NaiveBayes, DecisionStump, HoeffdingTree, HoeffdingAdaptiveTree, AdaHoeffdingTree
classifier.name=NaiveBayes
# the options for the classifier (null or a String e.g. "-z")
classifier.options=
# sets the number of instances which should be waited in between to learn
# the classifier. Has just a small effect.
classifier.learn.batchsize=5
# the mode of classification used. Currently implemented: OnDemand and OnExplore
classifier.mode=OnExplore
# the penalty given to Domains which have to many outstanding urls they
# have not received feedback from, this is a Bandit function specific thing
domain.value.function.missingfeedbackpenalty=0.1
# maximal fraction of the queue which can be occupied by the same domain
domain.queue.fraction=0.5
# sets the minimal number of URLs which should be in the input queue
# before the anthelion module starts working
inputlist.size=5
# time in ms the crawler will wait if he has nothing todo to check if there is something new
crawler.waitingtime=100