-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.ini
53 lines (48 loc) · 1.71 KB
/
config.ini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# This configuration file follows the INI File Structure
# https://docs.python.org/3/library/configparser.html#supported-ini-file-structure
[DATA]
# Directory with Spacy Vocabulary files
Spacy Vocab Dir = ex_data/vocab
# Spacy KnowledgeBase file
Spacy KB = ex_data/kb
# File with CoNLL annotated with Wikidata QIDs
Conll Annotated = ex_data/conll/conll-wikidata-iob-annotations
# File with Wikidata QID to Wikipedia abstracts
Wikipedia Abstracts = ex_data/yi-chun/wikidata-wikipedia.tsv
# If you have run these scripts before,
# this is the file generated by the candidate generator
Candidate Info = data/docs_entities_info.json
[INPUT VECTORS]
Use Balanced Dataset = False
# Directory to read or write generated input vectors
Input Vectors Dir = data/vectors
# If Use Balanced Dataset = True:
# Directory to read or write balanced dataset input vectors
Balanced Dataset Dir = data/balanced_dataset
# Number of negative samples in balanced dataset (determines pos:neg-ratio)
N Negative Samples = 1
[BERT]
# Name of the model if fetched from Huggingface:
Model ID = bert-base-uncased
Max Sequence Length = 512
# Directory to read BERT model from
Bert Model Dir = models/bert_ned
# Directory to write trained models to
Save Model Dir = models/trained
[TRAINING]
# Epochs of training
Epochs = 5
# Number of encoder transformers to freeze before training (max 12)
Freeze N Transformers = 8
Batch Size = 24
# Use default document split of CoNLL dataset
Use Default Split = True
# If Use Default Split = False:
Training Set Size = 0.1
Validation Set Size = 0.1
Test Set Size = 0.1
[VERBOSITY]
# Frequency of progress updates during training in number of steps
Training Update Frequency = 100
Validation Update Frequency = 50
Test Update Frequency = 50