From 5b310f43630baf1471980c692bf5dc079dcac73e Mon Sep 17 00:00:00 2001
From: Bo Li <drluodian@gmail.com>
Date: Wed, 4 Sep 2024 15:11:58 +0000
Subject: [PATCH] fix: fix wrong args in wandb logger

The code changes in this commit add logging to Weights and Biases in the evaluation process. This allows for tracking and monitoring of evaluation results.
---
 lmms_eval/__main__.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/lmms_eval/__main__.py b/lmms_eval/__main__.py
index 97045e5f..67d0f572 100755
--- a/lmms_eval/__main__.py
+++ b/lmms_eval/__main__.py
@@ -321,20 +321,22 @@ def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
 
     for args in args_list:
         try:
-            if is_main_process and args.wandb_args:  # thoughtfully we should only init wandb once, instead of multiple ranks to avoid network traffics and unwanted behaviors.
-                wandb_logger = WandbLogger(args)
+            # if is_main_process and args.wandb_args:  # thoughtfully we should only init wandb once, instead of multiple ranks to avoid network traffics and unwanted behaviors.
+            #     wandb_logger = WandbLogger()
 
             results, samples = cli_evaluate_single(args)
             results_list.append(results)
 
             accelerator.wait_for_everyone()
             if is_main_process and args.wandb_args:
-                wandb_logger.post_init(results)
-                wandb_logger.log_eval_result()
-                if args.wandb_log_samples and samples is not None:
-                    wandb_logger.log_eval_samples(samples)
-
-                wandb_logger.finish()
+                try:
+                    wandb_logger.post_init(results)
+                    wandb_logger.log_eval_result()
+                    if args.wandb_log_samples and samples is not None:
+                        wandb_logger.log_eval_samples(samples)
+                except Exception as e:
+                    eval_logger.info(f"Logging to Weights and Biases failed due to {e}")
+                # wandb_logger.finish()
 
         except Exception as e:
             if args.verbosity == "DEBUG":
@@ -349,6 +351,9 @@ def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
         if results is not None:
             print_results(args, results)
 
+    if args.wandb_args:
+        wandb_logger.run.finish()
+
 
 def cli_evaluate_single(args: Union[argparse.Namespace, None] = None) -> None:
     selected_task_list = args.tasks.split(",") if args.tasks else None