dataclasses=0.7.dist
transformers=2.8.0
output_dir
overwrite_output_dir
do_train
do_eval
do_predict
per_gpu_train_batch_size
per_gpu_eval_batch_size
gradient_accumulation_steps
learning_rate
weight_decay
adam_epsilon
max_grad_norm
num_train_epochs
warmup_steps
logging_dir
seed
warmup_proportion