salesforce · Paul-B98 · Jun 19, 2023 · Jun 19, 2023
diff --git a/.gitignore b/.gitignore
@@ -161,4 +161,5 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
-.DS_Store
+.DS_Store
+.vscode/
diff --git a/codetf/performance/evaluation_metric.py b/codetf/performance/evaluation_metric.py
@@ -4,17 +4,26 @@
 from sklearn.metrics import f1_score, precision_score, recall_score
 from transformers import EvalPrediction
 
+
 class EvaluationMetric:
     def __init__(self, metric, tokenizer):
         self.metric = metric
         self.tokenizer = tokenizer
 
     def compute_metrics(self, eval_pred: EvalPrediction):
-        predictions = self.tokenizer.batch_decode(eval_pred.predictions, skip_special_tokens=True)
-        references = self.tokenizer.batch_decode(eval_pred.label_ids, skip_special_tokens=True)
+        predictions = self.tokenizer.batch_decode(
+            eval_pred.predictions, skip_special_tokens=True
+        )
+        references = self.tokenizer.batch_decode(
+            eval_pred.label_ids, skip_special_tokens=True
+        )
 
         if self.metric == "bleu":
             return {"bleu": sacrebleu.corpus_bleu(predictions, [references]).score}
+        elif self.metric == "chrf":
+            return {"chrf": sacrebleu.corpus_chrf(predictions, [references]).score}
+        elif self.metric == "ter":
+            return {"ter": sacrebleu.corpus_ter(predictions, [references]).score}
         elif self.metric == "f1":
             return {"f1": self.compute_f1_score(predictions, references)}
         elif self.metric == "precision":
@@ -31,22 +40,24 @@ def compute_metrics(self, eval_pred: EvalPrediction):
 
     def compute_f1_score(self, hypotheses, references):
         # Calculate F1 score for your use case, this is just a sample
-        return f1_score(hypotheses, references, average='weighted')
+        return f1_score(hypotheses, references, average="weighted")
 
     def compute_precision_score(self, hypotheses, references):
         # Calculate precision score for your use case, this is just a sample
-        return precision_score(hypotheses, references, average='weighted')
+        return precision_score(hypotheses, references, average="weighted")
 
     def compute_recall_score(self, hypotheses, references):
         # Calculate recall score for your use case, this is just a sample
-        return recall_score(hypotheses, references, average='weighted')
+        return recall_score(hypotheses, references, average="weighted")
 
     def compute_rouge(self, hypotheses, references):
-        scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
+        scorer = rouge_scorer.RougeScorer(["rouge1", "rougeL"], use_stemmer=True)
         scores = [scorer.score(ref, hyp) for ref, hyp in zip(references, hypotheses)]
-        rouge1 = sum([score['rouge1'].fmeasure for score in scores]) / len(scores)
-        rougeL = sum([score['rougeL'].fmeasure for score in scores]) / len(scores)
+        rouge1 = sum([score["rouge1"].fmeasure for score in scores]) / len(scores)
+        rougeL = sum([score["rougeL"].fmeasure for score in scores]) / len(scores)
         return {"rouge1": rouge1, "rougeL": rougeL}
 
     def compute_meteor(self, hypotheses, references):
-        return sum([meteor_score([ref], hyp) for ref, hyp in zip(references, hypotheses)]) / len(hypotheses)
+        return sum(
+            [meteor_score([ref], hyp) for ref, hyp in zip(references, hypotheses)]
+        ) / len(hypotheses)